diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 46ad94c..5b3bf45 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,6 +12,8 @@ on: jobs: build-and-test: runs-on: [self-hosted, gpu] + env: + EVM_FORK: SHANGHAI services: docker: @@ -20,7 +22,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: path: ${{ github.workspace }}/${{ github.run_id }} @@ -38,7 +40,7 @@ jobs: - name: Pull cached Docker image run: | cd ${{ github.workspace }}/${{ github.run_id }} - docker pull augustus/goevmlab-cuevm:20241008 || true + docker pull augustus/goevmlab-cuevm:20241216 || true - name: Start cuevm-test-runner container run: | @@ -52,7 +54,10 @@ jobs: run: | cd ${{ github.workspace }}/${{ github.run_id }} docker exec cuevm-test-runner-${{ github.run_id }} /bin/bash -c " - cmake -S . -B build -DTESTS=OFF -DGPU=ON -DCPU=OFF -DCUDA_COMPUTE_CAPABILITY=86 -DENABLE_EIP_3155_OPTIONAL=OFF -DENABLE_EIP_3155=ON + python3 -m ensurepip --upgrade + python3 -m pip install --no-cache-dir --upgrade cmake==4.2.1 + export PATH=\"/root/.local/bin:\$PATH\" + cmake -S . -B build -DTESTS=OFF -DGPU=ON -DCPU=OFF -DEVM_VERSION=${EVM_FORK} -DCUDA_COMPUTE_CAPABILITY=\"103-real;103-virtual\" -DENABLE_EIP_3155_OPTIONAL=OFF -DENABLE_EIP_3155=ON cmake --build build -j 8 " @@ -83,7 +88,7 @@ jobs: - name: Archive test results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: test-results path: ${{ github.workspace }}/${{ github.run_id }}/test-outputs diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..3691318 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,60 @@ +# AGENTS.md — Execution Guide for Advanced Contributors + +This document defines how an advanced agent should implement the remaining work to achieve a **GPU‑only, world‑class CuEVM fuzzing stack** on NVIDIA B300‑class GPUs. + +## Mission +Deliver maximum‑coverage, GPU‑only fuzzing with multi‑sequence, cross‑contract search and invariant‑based oracles, while keeping the codebase stable, reproducible, and production‑ready. + +## Operating principles +- Work in **small, reviewable increments**. +- Keep the system **GPU‑only** for fuzzing (do not depend on CPU‑based gating in the fuzz path). +- Add **measurements first**, then optimize. +- Ensure changes are deterministic and reproducible. + +## Repository map (key areas) +- `fuzzing/` — GPU fuzzing harness, configs, invariants. +- `CuEVM/` — core GPU engine and execution semantics. +- `tests/` — GPU/CPU tests and fixtures. +- `scripts/` — CI helpers and test runners. + +## Implementation checklist (apply in order) +1. **Fork coverage** + - Implement foundry fork and remove old shits .! + - + +2. **Coverage instrumentation** + - Add on‑GPU counters for branches, opcodes, and storage writes. + - Export coverage maps per batch and merge into a global map. + +3. **Stateful multi‑sequence search** + - Extend the fuzzer to mutate sequences (insert/delete/reorder). + - Add sender/role, value, and block‑context mutation. + - Support cross‑contract call graphs and receiver pools. + +4. **Invariant engine** + - Implement invariant templates (ERC‑20/4626/AMM/lending). + - Add config‑driven invariants per target contract. + - Prioritize cases that violate invariants and retain in corpus. + +5. **Corpus + minimization** + - Keep a GPU‑only corpus of interesting sequences. + - Implement minimization to produce small, reproducible JSON tests. + +6. **GPU throughput + profiling** + - Auto‑tune batch sizing for B300 occupancy. + - Add timing metrics and Nsight Systems hooks. + +7. **Observability + reliability** + - Emit structured logs with coverage and invariant stats. + - Add failure recovery and checkpointing. + +## Required quality gates +- Run targeted GPU fuzz smoke tests before merging changes. +- Keep all changes behind configurable flags (opt‑in where needed). +- Maintain consistent formatting and avoid unrelated refactors. + +## Useful commands +- Configure (requires CMake 4.2+): + - `cmake -S . -B build -DTESTS=ON -DTESTS_GPU=OFF -DENABLE_EIP_3155=ON` +- Example GPU fuzz run: + - `python fuzzing/fuzzer.py --input fuzzing/contracts/erc20.sol --config fuzzing/configurations/default.json --num_instances 256 --num_iterations 100` diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b39a74..2947819 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.22 FATAL_ERROR) +cmake_minimum_required(VERSION 3.20 FATAL_ERROR) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE) endif() @@ -25,7 +25,7 @@ enable_language(CUDA) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CUDA_STANDARD 20) -set(CUDA_COMPUTE_CAPABILITY "50" CACHE STRING "CUDA Compute Capability") +set(CUDA_COMPUTE_CAPABILITY "103-real;103-virtual" CACHE STRING "CUDA Compute Capability (e.g. 103-real;103-virtual for NVIDIA B300)") set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTE_CAPABILITY}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/CuEVM/CMakeLists.txt b/CuEVM/CMakeLists.txt index 7910197..50d6a04 100644 --- a/CuEVM/CMakeLists.txt +++ b/CuEVM/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.22 FATAL_ERROR) +cmake_minimum_required(VERSION 4.2 FATAL_ERROR) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE) endif() @@ -23,7 +23,7 @@ enable_language(CUDA) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CUDA_STANDARD 20) if (NOT CUDA_COMPUTE_CAPABILITY) - set(CUDA_COMPUTE_CAPABILITY "50" CACHE STRING "CUDA Compute Capability") + set(CUDA_COMPUTE_CAPABILITY "103-real;103-virtual" CACHE STRING "CUDA Compute Capability (e.g. 103-real;103-virtual for NVIDIA B300)") endif() if (NOT CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTE_CAPABILITY}) @@ -71,8 +71,13 @@ target_link_libraries(${PROJECT_NAME} PRIVATE CGBN CuCrypto CuBigInt) # then the external ones target_link_libraries(${PROJECT_NAME} PUBLIC gmp cjson ${CUDA_LIBRARIES}) +# Add curand for GPU fuzzing RNG +find_library(CURAND_LIBRARY curand HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +if(CURAND_LIBRARY) + target_link_libraries(${PROJECT_NAME} PUBLIC ${CURAND_LIBRARY}) +endif() + # Add specific NVCC flags using target_compile_options (if necessary) target_compile_options(${PROJECT_NAME} PRIVATE $<$:-lineinfo --std=c++20 -rdc=true --expt-relaxed-constexpr>) target_compile_definitions(${PROJECT_NAME} PRIVATE CGBN_TPI=${CGBN_TPI}) - diff --git a/CuEVM/include/CuEVM/fuzzing/corpus.cuh b/CuEVM/include/CuEVM/fuzzing/corpus.cuh new file mode 100644 index 0000000..b39e310 --- /dev/null +++ b/CuEVM/include/CuEVM/fuzzing/corpus.cuh @@ -0,0 +1,458 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// GPU Corpus Management for Smart Contract Fuzzing +// SPDX-License-Identifier: MIT + +#ifndef _CUEVM_FUZZING_CORPUS_H_ +#define _CUEVM_FUZZING_CORPUS_H_ + +#include +#include +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// ============================================================================ +// Corpus Configuration +// ============================================================================ + +constexpr uint32_t MAX_CORPUS_SIZE = 65536; // Max seeds in corpus +constexpr uint32_t MAX_SEED_DATA_SIZE = 8192; // Max bytes per seed +constexpr uint32_t MAX_SEQUENCE_LENGTH = 32; // Max transactions per sequence +constexpr uint32_t CORPUS_BUCKET_COUNT = 256; // Hash buckets for dedup +constexpr uint32_t MIN_CORPUS_ENTRIES = 64; // Minimum seeds to maintain + +// Energy assignment for seed scheduling +constexpr uint32_t ENERGY_BASE = 100; +constexpr uint32_t ENERGY_NEW_COVERAGE = 500; +constexpr uint32_t ENERGY_NEW_BUG = 1000; +constexpr uint32_t ENERGY_DECAY_FACTOR = 2; +constexpr uint32_t ENERGY_MIN = 10; + +// ============================================================================ +// Seed Entry +// ============================================================================ + +struct seed_data_t { + uint8_t* data; // Raw calldata bytes + uint32_t length; // Data length + uint32_t capacity; // Allocated capacity +}; + +struct seed_metadata_t { + uint64_t id; // Unique seed ID + uint64_t parent_id; // Parent seed (0 if from initial corpus) + uint64_t timestamp; // When this seed was added + uint32_t generation; // Mutation generation count + + // Coverage information + uint32_t unique_edges; // Edges this seed covers + uint32_t unique_branches; // Branches this seed covers + uint32_t coverage_hash; // Hash of coverage bitmap for dedup + float coverage_contribution; // How much new coverage this seed added + + // Quality metrics + uint32_t execution_count; // How many times this seed was used + uint32_t mutation_count; // How many mutants were derived + uint32_t child_count; // How many children added to corpus + uint32_t bug_count; // Bugs found from this seed + + // Scheduling + uint32_t energy; // Current energy for scheduling + uint32_t priority; // Priority score (higher = more likely to pick) + uint32_t last_selected; // Timestamp of last selection + + // Minimization + bool minimized; // Whether this seed has been minimized + uint32_t original_length; // Length before minimization +}; + +struct seed_entry_t { + seed_data_t data; + seed_metadata_t metadata; + + // For sequence seeds + uint32_t num_transactions; + uint32_t tx_offsets[MAX_SEQUENCE_LENGTH]; // Offset of each tx in data + uint32_t tx_lengths[MAX_SEQUENCE_LENGTH]; // Length of each tx + + // Transaction context + evm_word_t senders[MAX_SEQUENCE_LENGTH]; + evm_word_t values[MAX_SEQUENCE_LENGTH]; + evm_word_t receivers[MAX_SEQUENCE_LENGTH]; + + // Block context for sequence + evm_word_t block_number; + evm_word_t timestamp; + + __host__ __device__ void init(); + __host__ __device__ void copy_from(const seed_entry_t& other); + __host__ __device__ void set_transaction(uint32_t tx_idx, const uint8_t* calldata, + uint32_t len, const evm_word_t& sender, + const evm_word_t& value); +}; + +// ============================================================================ +// Corpus Statistics +// ============================================================================ + +struct corpus_stats_t { + uint64_t total_seeds_added; + uint64_t total_seeds_removed; + uint64_t total_executions; + uint64_t total_mutations; + uint64_t total_new_coverage; + uint64_t total_bugs_found; + + uint32_t current_size; + uint32_t unique_coverage_edges; + uint32_t unique_coverage_branches; + float overall_coverage_percent; + + uint64_t last_new_coverage_time; + uint64_t last_bug_time; + uint32_t cycles_since_progress; + + // Per-category counts + uint32_t initial_seeds; + uint32_t mutant_seeds; + uint32_t splice_seeds; + uint32_t minimized_seeds; + + __host__ __device__ void init(); + __host__ __device__ void update_coverage(uint32_t new_edges, uint32_t new_branches); + __host__ __device__ void record_new_seed(bool from_mutation, bool caused_new_coverage); +}; + +// ============================================================================ +// Corpus Hash Table (for deduplication) +// ============================================================================ + +struct corpus_bucket_t { + uint32_t seed_indices[16]; // Indices of seeds in this bucket + uint32_t count; +}; + +struct corpus_hash_table_t { + corpus_bucket_t buckets[CORPUS_BUCKET_COUNT]; + + __host__ __device__ void init(); + __host__ __device__ bool contains(uint32_t coverage_hash); + __host__ __device__ void insert(uint32_t coverage_hash, uint32_t seed_idx); + __host__ __device__ void remove(uint32_t coverage_hash, uint32_t seed_idx); +}; + +// ============================================================================ +// GPU Corpus Manager +// ============================================================================ + +class GPUCorpusManager { +public: + __host__ GPUCorpusManager(uint32_t max_size = MAX_CORPUS_SIZE); + __host__ ~GPUCorpusManager(); + + // Seed management + __host__ __device__ bool add_seed(const seed_entry_t& seed, bool check_duplicate = true); + __host__ __device__ bool add_seed_if_interesting(const seed_entry_t& seed, + const coverage_snapshot_t& coverage, + const bug_storage_t* bugs); + __host__ __device__ void remove_seed(uint32_t idx); + __host__ __device__ seed_entry_t* get_seed(uint32_t idx); + __host__ __device__ uint32_t size() const { return stats_.current_size; } + + // Seed selection for fuzzing + __host__ __device__ seed_entry_t* select_seed(curandState* rng); + __host__ __device__ seed_entry_t* select_weighted(curandState* rng); + __host__ __device__ void update_seed_after_execution(uint32_t idx, bool caused_new_coverage, + bool found_bug); + + // Corpus maintenance + __host__ void cull_corpus(); // Remove low-quality seeds + __host__ void compact_corpus(); // Remove gaps in storage + __host__ void sort_by_priority(); // Sort seeds by priority + __host__ void recalculate_energies(); // Recalculate all seed energies + + // Minimization + __host__ void minimize_seed(uint32_t idx); + __host__ void minimize_all(); + + // Merging (for parallel fuzzing) + __host__ void merge_from(const GPUCorpusManager& other); + + // Import/Export + __host__ void import_seeds(const char* directory); + __host__ void export_seeds(const char* directory); + __host__ void export_interesting_seeds(const char* directory, uint32_t max_seeds); + __host__ void load_checkpoint(const char* filename); + __host__ void save_checkpoint(const char* filename); + + // Coverage integration + __host__ void set_coverage_baseline(const gpu_coverage_map_t* baseline); + __host__ void update_coverage_contribution(uint32_t seed_idx, + const coverage_snapshot_t& new_coverage); + + // Statistics + __host__ __device__ corpus_stats_t* get_stats() { return &stats_; } + __host__ void print_stats(); + __host__ void export_stats_json(const char* filename); + +private: + seed_entry_t* seeds_; // GPU-accessible seed array + uint32_t capacity_; + corpus_stats_t stats_; + corpus_hash_table_t hash_table_; + gpu_coverage_map_t* coverage_baseline_; + + // Free list for removed seeds + uint32_t* free_indices_; + uint32_t free_count_; + + // Priority queue for selection + uint32_t* priority_queue_; + uint32_t queue_size_; + + __host__ __device__ uint32_t compute_coverage_hash(const coverage_snapshot_t& coverage); + __host__ __device__ uint32_t compute_seed_hash(const seed_entry_t& seed); + __host__ __device__ float compute_priority(const seed_metadata_t& metadata); + __host__ __device__ uint32_t allocate_slot(); + __host__ __device__ void deallocate_slot(uint32_t idx); +}; + +// ============================================================================ +// Seed Minimizer +// ============================================================================ + +class SeedMinimizer { +public: + __host__ SeedMinimizer(); + + // Delta-debugging based minimization + __host__ bool minimize(seed_entry_t* seed, + bool (*test_fn)(const seed_entry_t*, void*), + void* test_ctx); + + // Minimize transaction sequence + __host__ bool minimize_sequence(seed_entry_t* seed, + bool (*test_fn)(const seed_entry_t*, void*), + void* test_ctx); + + // Minimize individual calldata + __host__ bool minimize_calldata(uint8_t* data, uint32_t* length, + bool (*test_fn)(const uint8_t*, uint32_t, void*), + void* test_ctx); + +private: + // Delta debugging helpers + __host__ bool ddmin(uint8_t* data, uint32_t* length, uint32_t granularity, + bool (*test_fn)(const uint8_t*, uint32_t, void*), + void* test_ctx); +}; + +// ============================================================================ +// Corpus Distillation (create minimal corpus) +// ============================================================================ + +class CorpusDistiller { +public: + __host__ CorpusDistiller(GPUCorpusManager* corpus); + + // Create minimal corpus that maintains coverage + __host__ void distill(GPUCorpusManager* output_corpus, + const gpu_coverage_map_t* target_coverage); + + // Greedy set cover algorithm + __host__ void greedy_cover(GPUCorpusManager* output_corpus, + const gpu_coverage_map_t* target_coverage); + +private: + GPUCorpusManager* source_corpus_; +}; + +// ============================================================================ +// Invariant System +// ============================================================================ + +enum class InvariantType : uint8_t { + // Value invariants + STORAGE_EQUALS = 0, + STORAGE_NOT_ZERO = 1, + STORAGE_LESS_THAN = 2, + STORAGE_GREATER_THAN = 3, + STORAGE_IN_RANGE = 4, + + // Balance invariants + BALANCE_MIN = 10, + BALANCE_MAX = 11, + BALANCE_EQUALS = 12, + BALANCE_CONSERVED = 13, + + // Supply invariants (tokens) + TOTAL_SUPPLY_CONSERVED = 20, + TOTAL_SUPPLY_MAX = 21, + + // Access control invariants + OWNER_UNCHANGED = 30, + ADMIN_ONLY = 31, + + // State machine invariants + STATE_VALID = 40, + STATE_TRANSITION_VALID = 41, + + // Relationship invariants + SUM_EQUALS = 50, + RATIO_MAINTAINED = 51, + + // Protocol-specific + AMM_K_CONSERVED = 60, + LENDING_COLLATERAL_RATIO = 61, + ERC4626_ASSET_SHARE_RATIO = 62, + + // Custom + CUSTOM = 100 +}; + +struct invariant_t { + InvariantType type; + uint32_t id; + + // Target storage slots/addresses + evm_word_t target_address; + evm_word_t slot1; + evm_word_t slot2; + + // Expected values + evm_word_t expected_value; + evm_word_t min_value; + evm_word_t max_value; + + // For relationship invariants + evm_word_t addresses[4]; + evm_word_t slots[4]; + uint32_t num_slots; + + // Metadata + char description[128]; + bool enabled; + uint32_t violation_count; + + __host__ __device__ void init(); +}; + +struct invariant_result_t { + uint32_t invariant_id; + bool violated; + evm_word_t actual_value; + evm_word_t expected_value; + uint32_t tx_index; + uint64_t timestamp; +}; + +// ============================================================================ +// Invariant Checker +// ============================================================================ + +constexpr uint32_t MAX_INVARIANTS = 256; + +class InvariantChecker { +public: + __host__ __device__ InvariantChecker(); + + // Add invariants + __host__ __device__ uint32_t add_invariant(const invariant_t& inv); + __host__ __device__ void remove_invariant(uint32_t id); + __host__ __device__ void enable_invariant(uint32_t id, bool enabled); + + // Check invariants + __host__ __device__ void check_all(const evm_word_t* storage, + const evm_word_t* balances, + uint32_t tx_index, + invariant_result_t* results, + uint32_t* num_violations); + + __host__ __device__ bool check_single(uint32_t id, + const evm_word_t* storage, + const evm_word_t* balances, + invariant_result_t* result); + + // Pre-built invariant templates + __host__ void add_erc20_invariants(const evm_word_t& token_address); + __host__ void add_erc721_invariants(const evm_word_t& token_address); + __host__ void add_erc4626_invariants(const evm_word_t& vault_address); + __host__ void add_amm_invariants(const evm_word_t& pool_address); + __host__ void add_lending_invariants(const evm_word_t& protocol_address); + + // Import from config + __host__ void load_from_json(const char* filename); + __host__ void save_to_json(const char* filename); + + // Statistics + __host__ __device__ uint32_t get_violation_count(uint32_t id); + __host__ __device__ uint32_t get_total_violations(); + +private: + invariant_t invariants_[MAX_INVARIANTS]; + uint32_t num_invariants_; + + __host__ __device__ bool check_storage_equals(const invariant_t& inv, + const evm_word_t* storage); + __host__ __device__ bool check_storage_range(const invariant_t& inv, + const evm_word_t* storage); + __host__ __device__ bool check_balance_conserved(const invariant_t& inv, + const evm_word_t* balances); + __host__ __device__ bool check_sum_equals(const invariant_t& inv, + const evm_word_t* storage); +}; + +// ============================================================================ +// CUDA Kernels +// ============================================================================ + +__global__ void kernel_select_seeds( + seed_entry_t* seeds, + uint32_t num_seeds, + uint32_t* selected_indices, + uint32_t num_to_select, + curandState* rng_states +); + +__global__ void kernel_update_energies( + seed_entry_t* seeds, + uint32_t num_seeds, + float decay_factor +); + +__global__ void kernel_check_invariants( + InvariantChecker* checker, + const evm_word_t* storages, // Storage state per instance + const evm_word_t* balances, // Balance state per instance + uint32_t num_instances, + invariant_result_t* results, + uint32_t* violation_counts +); + +__global__ void kernel_compute_coverage_hashes( + const coverage_snapshot_t* snapshots, + uint32_t num_snapshots, + uint32_t* hashes +); + +// ============================================================================ +// Host Helper Functions +// ============================================================================ + +__host__ GPUCorpusManager* allocate_corpus_manager(uint32_t max_size); +__host__ void free_corpus_manager(GPUCorpusManager* manager); + +__host__ InvariantChecker* allocate_invariant_checker(); +__host__ void free_invariant_checker(InvariantChecker* checker); + +__host__ void generate_initial_corpus(GPUCorpusManager* corpus, + const uint8_t* contract_abi, + uint32_t abi_length); + +} // namespace fuzzing +} // namespace CuEVM + +#endif // _CUEVM_FUZZING_CORPUS_H_ diff --git a/CuEVM/include/CuEVM/fuzzing/coverage.cuh b/CuEVM/include/CuEVM/fuzzing/coverage.cuh new file mode 100644 index 0000000..3dd47e7 --- /dev/null +++ b/CuEVM/include/CuEVM/fuzzing/coverage.cuh @@ -0,0 +1,317 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// GPU Coverage Instrumentation for NVIDIA B300 Smart Contract Fuzzing +// SPDX-License-Identifier: MIT + +#ifndef _CUEVM_FUZZING_COVERAGE_H_ +#define _CUEVM_FUZZING_COVERAGE_H_ + +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// Coverage map sizes optimized for B300 (SM 103) +constexpr uint32_t COVERAGE_MAP_SIZE = 65536; // 64KB coverage bitmap +constexpr uint32_t BRANCH_COVERAGE_SIZE = 32768; // 32K branch coverage entries +constexpr uint32_t OPCODE_COVERAGE_SIZE = 256; // All EVM opcodes +constexpr uint32_t STORAGE_COVERAGE_SIZE = 16384; // Storage slot coverage +constexpr uint32_t CALL_COVERAGE_SIZE = 4096; // Call target coverage +constexpr uint32_t PC_COVERAGE_SIZE = 65536; // Program counter coverage +constexpr uint32_t EDGE_COVERAGE_SIZE = 131072; // Edge coverage (pc_from -> pc_to) + +// Coverage hit counter types +using coverage_counter_t = uint8_t; // Saturating counter +using coverage_bitmap_t = uint32_t; // Bitmap word + +// Branch distance quantization for gradient guidance +constexpr uint32_t DISTANCE_BUCKETS = 16; +constexpr uint64_t DISTANCE_THRESHOLDS[DISTANCE_BUCKETS] = { + 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 4096, 16384, 65536, UINT64_MAX +}; + +/** + * Edge coverage entry tracking source->destination transitions + */ +struct edge_coverage_entry_t { + uint32_t pc_from; + uint32_t pc_to; + uint32_t hit_count; + uint32_t contract_id; +}; + +/** + * Branch coverage entry with distance tracking for gradient-guided fuzzing + */ +struct branch_coverage_entry_t { + uint32_t pc; + uint32_t true_target; + uint32_t false_target; + uint8_t taken_true; + uint8_t taken_false; + uint8_t distance_bucket; // Quantized distance for JUMPI condition + uint64_t min_distance; // Minimum observed distance to flip branch +}; + +/** + * Storage coverage entry for tracking SLOAD/SSTORE patterns + */ +struct storage_coverage_entry_t { + uint32_t pc; + uint32_t slot_hash; // Hash of storage slot + uint8_t is_read; + uint8_t is_write; + uint8_t is_warm; + uint8_t value_changed; +}; + +/** + * Call coverage entry for tracking inter-contract calls + */ +struct call_coverage_entry_t { + uint32_t pc; + uint32_t caller_contract_id; + uint32_t callee_address_hash; + uint8_t opcode; // CALL, CALLCODE, DELEGATECALL, STATICCALL + uint8_t success; + uint8_t is_precompile; + uint8_t value_transferred; +}; + +/** + * Opcode execution statistics + */ +struct opcode_stats_t { + uint64_t execution_count; + uint64_t gas_used_total; + uint32_t max_stack_depth; + uint32_t error_count; +}; + +/** + * Per-contract coverage data + */ +struct contract_coverage_t { + uint32_t contract_id; + uint32_t code_size; + uint32_t unique_pcs_hit; + uint32_t unique_branches_hit; + uint32_t unique_edges_hit; + float pc_coverage_percent; + float branch_coverage_percent; + float edge_coverage_percent; +}; + +/** + * GPU Coverage Map - Main coverage tracking structure + * Designed for efficient parallel updates on B300 + */ +struct gpu_coverage_map_t { + // Primary coverage bitmaps (atomically updated) + coverage_counter_t* pc_bitmap; // [PC_COVERAGE_SIZE] + coverage_counter_t* edge_bitmap; // [EDGE_COVERAGE_SIZE] + coverage_counter_t* opcode_counters; // [OPCODE_COVERAGE_SIZE] + + // Detailed coverage tracking + branch_coverage_entry_t* branch_entries; // [BRANCH_COVERAGE_SIZE] + storage_coverage_entry_t* storage_entries; // [STORAGE_COVERAGE_SIZE] + call_coverage_entry_t* call_entries; // [CALL_COVERAGE_SIZE] + + // Statistics + opcode_stats_t* opcode_stats; // [OPCODE_COVERAGE_SIZE] + contract_coverage_t* contract_coverage; // Per-contract stats + + // Counters + uint32_t num_branch_entries; + uint32_t num_storage_entries; + uint32_t num_call_entries; + uint32_t num_contracts; + + // Global statistics + uint64_t total_instructions_executed; + uint64_t total_branches_executed; + uint64_t total_storage_ops; + uint64_t total_calls; + uint64_t total_gas_used; + + // Coverage metrics + uint32_t unique_pcs; + uint32_t unique_edges; + uint32_t unique_branches; + float overall_coverage; + + // Bitmap for quick "new coverage" detection + coverage_bitmap_t* virgin_bits; // [COVERAGE_MAP_SIZE / 32] + + __host__ __device__ void init(); + __host__ __device__ void reset(); + __host__ __device__ void merge(const gpu_coverage_map_t& other); +}; + +/** + * Per-instance coverage state (thread-local during execution) + */ +struct instance_coverage_t { + // Hash-based compact representation for GPU efficiency + uint32_t edge_hashes[256]; // Recent edge hashes + uint32_t edge_hash_idx; + + uint32_t branch_hashes[64]; // Recent branch decisions + uint32_t branch_hash_idx; + + uint32_t storage_hashes[64]; // Recent storage accesses + uint32_t storage_hash_idx; + + // Quick stats for this instance + uint32_t pcs_hit; + uint32_t edges_hit; + uint32_t branches_taken; + uint32_t storage_ops; + uint32_t calls_made; + + // Last PC for edge tracking + uint32_t last_pc; + uint32_t last_opcode; + + __host__ __device__ void init(); + __host__ __device__ void record_pc(uint32_t pc); + __host__ __device__ void record_edge(uint32_t from_pc, uint32_t to_pc); + __host__ __device__ void record_branch(uint32_t pc, bool taken, uint64_t distance); + __host__ __device__ void record_storage(uint32_t pc, uint32_t slot_hash, bool is_write); + __host__ __device__ void record_call(uint32_t pc, uint32_t target_hash, uint8_t opcode, bool success); +}; + +/** + * Coverage instrumentation hooks for EVM execution + */ +class CoverageInstrumentation { +public: + __host__ __device__ CoverageInstrumentation(gpu_coverage_map_t* global_map, instance_coverage_t* instance); + + // Pre-execution hooks + __host__ __device__ void on_instruction_start(uint32_t pc, uint8_t opcode); + + // Post-execution hooks + __host__ __device__ void on_instruction_end(uint32_t pc, uint8_t opcode, uint32_t error_code); + + // Branch coverage + __host__ __device__ void on_jump(uint32_t from_pc, uint32_t to_pc); + __host__ __device__ void on_jumpi(uint32_t pc, uint32_t target, bool taken, + const evm_word_t& condition); + + // Storage coverage + __host__ __device__ void on_sload(uint32_t pc, const evm_word_t& slot, bool warm); + __host__ __device__ void on_sstore(uint32_t pc, const evm_word_t& slot, + const evm_word_t& old_value, const evm_word_t& new_value); + + // Call coverage + __host__ __device__ void on_call(uint32_t pc, uint8_t opcode, const evm_word_t& target, + const evm_word_t& value, bool success); + + // Memory coverage + __host__ __device__ void on_memory_access(uint32_t pc, uint32_t offset, uint32_t size, bool is_write); + + // Comparison coverage (for gradient-guided fuzzing) + __host__ __device__ void on_comparison(uint32_t pc, uint8_t opcode, + const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result); + + // Return/revert coverage + __host__ __device__ void on_return(uint32_t pc, bool success, uint32_t return_size); + + // Merge instance coverage to global + __host__ __device__ void finalize(); + +private: + gpu_coverage_map_t* global_map_; + instance_coverage_t* instance_; + + __host__ __device__ uint32_t hash_edge(uint32_t from, uint32_t to); + __host__ __device__ uint32_t hash_slot(const evm_word_t& slot); + __host__ __device__ uint8_t quantize_distance(uint64_t distance); + __host__ __device__ uint64_t compute_branch_distance(const evm_word_t& condition); +}; + +/** + * Coverage map allocator for B300 + */ +class CoverageMapAllocator { +public: + __host__ static gpu_coverage_map_t* allocate_global(uint32_t num_contracts = 1); + __host__ static instance_coverage_t* allocate_instances(uint32_t num_instances); + __host__ static void free_global(gpu_coverage_map_t* map); + __host__ static void free_instances(instance_coverage_t* instances); + + // Pinned memory for efficient host-device transfer + __host__ static gpu_coverage_map_t* allocate_pinned(); + __host__ static void copy_to_host(gpu_coverage_map_t* host_map, const gpu_coverage_map_t* device_map); +}; + +/** + * Coverage serialization for corpus management + */ +struct coverage_snapshot_t { + // Bitmap data pointers for serialization + uint8_t* pc_bitmap_data; + uint32_t pc_bitmap_size; + uint8_t* edge_bitmap_data; + uint32_t edge_bitmap_size; + + // Compact bitmap for quick coverage comparison (as uint32_t words) + uint32_t edge_bitmap[COVERAGE_MAP_SIZE / 32]; + + // Statistics + uint32_t unique_pcs; + uint32_t unique_edges; + uint32_t unique_branches; + float coverage_score; + uint64_t timestamp; + + __host__ void serialize(void* buffer, size_t* size); + __host__ static coverage_snapshot_t deserialize(const void* buffer, size_t size); + __host__ bool has_new_coverage(const coverage_snapshot_t& baseline); + __host__ float novelty_score(const coverage_snapshot_t& baseline); +}; + +/** + * AFL-style coverage bitmap operations + */ +namespace bitmap_ops { + __host__ __device__ uint32_t hash_pc(uint32_t pc, uint32_t prev_pc); + __host__ __device__ void increment_counter(coverage_counter_t* bitmap, uint32_t index); + __host__ __device__ bool check_virgin(coverage_bitmap_t* virgin, uint32_t index); + __host__ __device__ void mark_virgin(coverage_bitmap_t* virgin, uint32_t index); + __host__ uint32_t count_bits(const coverage_counter_t* bitmap, uint32_t size); + __host__ uint32_t count_nonzero(const coverage_counter_t* bitmap, uint32_t size); + __host__ void merge_bitmaps(coverage_counter_t* dst, const coverage_counter_t* src, uint32_t size); + __host__ bool has_new_bits(const coverage_counter_t* current, const coverage_counter_t* virgin, uint32_t size); +} + +// CUDA kernel for batch coverage merging +__global__ void kernel_merge_coverage( + gpu_coverage_map_t* global_map, + instance_coverage_t* instances, + uint32_t num_instances +); + +// CUDA kernel for computing coverage statistics +__global__ void kernel_compute_coverage_stats( + gpu_coverage_map_t* map, + uint32_t* unique_pcs, + uint32_t* unique_edges, + float* coverage_score +); + +// CUDA kernel for virgin bits detection +__global__ void kernel_detect_new_coverage( + gpu_coverage_map_t* current, + gpu_coverage_map_t* baseline, + uint32_t* new_coverage_flags, + uint32_t num_instances +); + +} // namespace fuzzing +} // namespace CuEVM + +#endif // _CUEVM_FUZZING_COVERAGE_H_ diff --git a/CuEVM/include/CuEVM/fuzzing/gpu_fuzzer.cuh b/CuEVM/include/CuEVM/fuzzing/gpu_fuzzer.cuh new file mode 100644 index 0000000..8f36b47 --- /dev/null +++ b/CuEVM/include/CuEVM/fuzzing/gpu_fuzzer.cuh @@ -0,0 +1,472 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// GPU Fuzzer Orchestrator for NVIDIA B300 Smart Contract Fuzzing +// SPDX-License-Identifier: MIT + +#ifndef _CUEVM_GPU_FUZZER_H_ +#define _CUEVM_GPU_FUZZER_H_ + +#include +#include +#include + +#include +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// ============================================================================ +// B300 Optimization Constants +// ============================================================================ + +// B300 GPU specifications (SM 103, Blackwell architecture) +constexpr uint32_t B300_SM_COUNT = 192; // Streaming multiprocessors +constexpr uint32_t B300_CUDA_CORES = 24576; // Total CUDA cores +constexpr uint32_t B300_MEMORY_GB = 192; // HBM3e memory +constexpr uint32_t B300_MEMORY_BANDWIDTH_TB = 8; // Memory bandwidth TB/s +constexpr uint32_t B300_L2_CACHE_MB = 128; // L2 cache size + +// Optimal batch sizes for B300 +constexpr uint32_t DEFAULT_BATCH_SIZE = 65536; // Default instances per batch +constexpr uint32_t MIN_BATCH_SIZE = 1024; +constexpr uint32_t MAX_BATCH_SIZE = 524288; // 512K max + +// Thread configuration for B300 +constexpr uint32_t THREADS_PER_BLOCK = 256; +constexpr uint32_t WARPS_PER_SM = 64; + +// Memory pool sizes +constexpr size_t INPUT_POOL_SIZE = 512 * 1024 * 1024; // 512MB for inputs +constexpr size_t STATE_POOL_SIZE = 1024 * 1024 * 1024; // 1GB for state +constexpr size_t TRACE_POOL_SIZE = 256 * 1024 * 1024; // 256MB for traces + +// ============================================================================ +// Fuzzer Configuration +// ============================================================================ + +struct fuzzer_config_t { + // Batch sizing + uint32_t num_instances; // Instances per batch + uint32_t sequence_length; // Transactions per sequence + bool auto_tune_batch_size; // Enable auto-tuning + + // Mutation configuration + uint32_t mutations_per_seed; // Mutations per selected seed + uint32_t havoc_iterations; // Havoc mutation depth + bool abi_aware_mutation; // Enable ABI-aware mutation + bool dictionary_mutation; // Enable dictionary-based mutation + + // Coverage configuration + bool track_edge_coverage; + bool track_branch_coverage; + bool track_storage_coverage; + bool gradient_guided; // Enable gradient-guided fuzzing + + // Oracle configuration + oracle_config_t oracle_config; + + // Corpus configuration + uint32_t max_corpus_size; + uint32_t min_corpus_size; + bool minimize_seeds; + uint32_t cull_interval; // Cull corpus every N iterations + + // Scheduling + uint32_t seed_schedule; // 0=random, 1=weighted, 2=round-robin + uint32_t energy_decay_iterations; + + // Reporting + uint32_t stats_interval; // Print stats every N iterations + uint32_t checkpoint_interval; // Save checkpoint every N iterations + bool verbose; + + // Timeouts + uint32_t max_iterations; // 0 = unlimited + uint32_t max_time_seconds; // 0 = unlimited + uint32_t stall_threshold; // Stop if no progress for N iterations + + // GPU configuration + int gpu_device_id; + bool use_pinned_memory; + bool use_unified_memory; + + __host__ void set_default(); + __host__ void set_for_b300(); // Optimized settings for B300 + __host__ void load_from_json(const char* filename); + __host__ void save_to_json(const char* filename); +}; + +// ============================================================================ +// Fuzzer Statistics +// ============================================================================ + +struct fuzzer_stats_t { + // Execution counts + uint64_t total_iterations; + uint64_t total_executions; // Total EVM executions + uint64_t total_transactions; // Total transactions executed + + // Coverage metrics + uint32_t unique_edges; + uint32_t unique_branches; + uint32_t unique_pcs; + float edge_coverage_percent; + float branch_coverage_percent; + + // Bug metrics + uint32_t total_bugs_found; + uint32_t unique_bugs; + uint32_t critical_bugs; + uint32_t high_bugs; + uint32_t medium_bugs; + uint32_t low_bugs; + + // Corpus metrics + uint32_t corpus_size; + uint32_t seeds_added; + uint32_t seeds_removed; + uint32_t interesting_seeds; + + // Performance metrics + double total_time_seconds; + double executions_per_second; + double transactions_per_second; + double gpu_utilization; + double memory_usage_gb; + + // Timing breakdown + double mutation_time_percent; + double execution_time_percent; + double coverage_time_percent; + double oracle_time_percent; + + // Progress tracking + uint64_t last_new_coverage_iter; + uint64_t last_bug_iter; + uint32_t iterations_since_progress; + + __host__ void init(); + __host__ void update(const corpus_stats_t& corpus_stats, + const bug_storage_t& bugs, + const gpu_coverage_map_t& coverage); + __host__ void print(); + __host__ void print_summary(); + __host__ void export_json(const char* filename); +}; + +// ============================================================================ +// B300 Batch Optimizer +// ============================================================================ + +class B300BatchOptimizer { +public: + __host__ B300BatchOptimizer(); + + // Auto-tune batch size for optimal throughput + __host__ uint32_t optimize_batch_size(uint32_t current_batch_size, + double current_throughput, + double gpu_utilization); + + // Compute optimal configuration + __host__ void compute_optimal_config(uint32_t contract_size, + uint32_t avg_tx_size, + fuzzer_config_t* config); + + // Memory estimation + __host__ size_t estimate_memory_usage(uint32_t batch_size, + uint32_t sequence_length, + uint32_t avg_tx_size); + + // Profiling + __host__ void start_profiling(); + __host__ void end_profiling(); + __host__ void record_iteration(double iteration_time, uint32_t batch_size); + __host__ void print_profile_stats(); + +private: + // Historical data for optimization + double throughput_history_[64]; + uint32_t batch_size_history_[64]; + uint32_t history_idx_; + uint32_t history_count_; + + // Profiling + bool profiling_enabled_; + std::chrono::high_resolution_clock::time_point profile_start_; + double total_profile_time_; + uint64_t total_profile_executions_; +}; + +// ============================================================================ +// GPU Memory Pool Manager +// ============================================================================ + +class GPUMemoryPool { +public: + __host__ GPUMemoryPool(size_t input_pool_size = INPUT_POOL_SIZE, + size_t state_pool_size = STATE_POOL_SIZE, + size_t trace_pool_size = TRACE_POOL_SIZE); + __host__ ~GPUMemoryPool(); + + // Allocate from pools + __host__ void* allocate_input(size_t size); + __host__ void* allocate_state(size_t size); + __host__ void* allocate_trace(size_t size); + + // Free back to pools + __host__ void free_input(void* ptr); + __host__ void free_state(void* ptr); + __host__ void free_trace(void* ptr); + + // Reset pools (for new batch) + __host__ void reset_input_pool(); + __host__ void reset_trace_pool(); + + // Statistics + __host__ size_t get_input_pool_used(); + __host__ size_t get_state_pool_used(); + __host__ size_t get_trace_pool_used(); + +private: + uint8_t* input_pool_; + uint8_t* state_pool_; + uint8_t* trace_pool_; + size_t input_pool_size_; + size_t state_pool_size_; + size_t trace_pool_size_; + size_t input_pool_offset_; + size_t state_pool_offset_; + size_t trace_pool_offset_; +}; + +// ============================================================================ +// Execution Batch +// ============================================================================ + +struct execution_batch_t { + // Inputs + mutation_input_t* inputs; // [num_instances] + sequence_t* sequences; // [num_instances] (if sequence mode) + + // Instance coverage tracking + instance_coverage_t* coverage; // [num_instances] + + // State trackers for oracles + execution_state_tracker_t* trackers;// [num_instances] + + // Results + bool* execution_success; // [num_instances] + uint8_t* return_data; // [num_instances * MAX_RETURN_SIZE] + uint32_t* return_sizes; // [num_instances] + uint64_t* gas_used; // [num_instances] + + // Batch metadata + uint32_t num_instances; + uint32_t sequence_length; + bool is_sequence_mode; + + __host__ void allocate(uint32_t instances, uint32_t seq_len, bool sequence_mode); + __host__ void free(); + __host__ void reset(); +}; + +// ============================================================================ +// GPU Fuzzer Main Class +// ============================================================================ + +class GPUFuzzer { +public: + __host__ GPUFuzzer(const char* contract_source, + const char* contract_name = nullptr, + const fuzzer_config_t* config = nullptr); + __host__ ~GPUFuzzer(); + + // Initialization + __host__ bool initialize(); + __host__ bool load_contract(const char* bytecode, uint32_t bytecode_len); + __host__ bool load_contract_from_file(const char* filename); + + // Configuration + __host__ void set_config(const fuzzer_config_t& config); + __host__ fuzzer_config_t* get_config() { return &config_; } + + // Invariants + __host__ void add_invariant(const invariant_t& inv); + __host__ void load_invariants(const char* filename); + + // Initial corpus + __host__ void add_seed(const uint8_t* calldata, uint32_t len); + __host__ void add_sequence_seed(const sequence_t& seq); + __host__ void load_initial_corpus(const char* directory); + __host__ void generate_initial_seeds(); + + // Main fuzzing loop + __host__ void run(); + __host__ void run_iterations(uint32_t num_iterations); + __host__ void stop(); + + // Single iteration (for fine-grained control) + __host__ void prepare_batch(); + __host__ void execute_batch(); + __host__ void analyze_batch(); + __host__ void update_corpus(); + + // Results + __host__ fuzzer_stats_t* get_stats() { return &stats_; } + __host__ bug_storage_t* get_bugs() { return bugs_; } + __host__ GPUCorpusManager* get_corpus() { return corpus_; } + __host__ gpu_coverage_map_t* get_coverage() { return global_coverage_; } + + // Reporting + __host__ void print_stats(); + __host__ void print_bugs(); + __host__ void export_results(const char* directory); + __host__ void save_checkpoint(const char* filename); + __host__ void load_checkpoint(const char* filename); + + // Callbacks + using progress_callback_t = void(*)(const fuzzer_stats_t*, void*); + using bug_callback_t = void(*)(const detected_bug_t*, void*); + __host__ void set_progress_callback(progress_callback_t cb, void* ctx); + __host__ void set_bug_callback(bug_callback_t cb, void* ctx); + +private: + // Configuration + fuzzer_config_t config_; + char* contract_source_; + char* contract_name_; + uint8_t* contract_bytecode_; + uint32_t bytecode_len_; + + // Core components + GPUMutationEngine* mutation_engine_; + GPUCorpusManager* corpus_; + InvariantChecker* invariant_checker_; + CompositeOracle* oracle_; + B300BatchOptimizer* batch_optimizer_; + GPUMemoryPool* memory_pool_; + + // Coverage tracking + gpu_coverage_map_t* global_coverage_; + coverage_snapshot_t baseline_coverage_; + + // Bug storage + bug_storage_t* bugs_; + + // Execution batch + execution_batch_t batch_; + + // Statistics + fuzzer_stats_t stats_; + std::chrono::high_resolution_clock::time_point start_time_; + + // Control + bool running_; + bool initialized_; + + // Callbacks + progress_callback_t progress_callback_; + void* progress_callback_ctx_; + bug_callback_t bug_callback_; + void* bug_callback_ctx_; + + // CUDA streams for overlap + cudaStream_t mutation_stream_; + cudaStream_t execution_stream_; + cudaStream_t analysis_stream_; + + // RNG state + gpu_rng_state_t rng_state_; + + // Internal methods + __host__ void select_seeds_for_batch(); + __host__ void mutate_batch(); + __host__ void execute_evm_batch(); + __host__ void collect_coverage(); + __host__ void check_oracles(); + __host__ void check_invariants(); + __host__ void process_interesting_inputs(); + __host__ void update_statistics(); + __host__ void report_progress(); + __host__ void maybe_cull_corpus(); + __host__ void maybe_checkpoint(); + __host__ bool should_stop(); +}; + +// ============================================================================ +// Convenience Functions +// ============================================================================ + +// Quick fuzz function for simple usage +__host__ fuzzer_stats_t quick_fuzz( + const char* contract_source, + const char* contract_name, + uint32_t num_iterations = 10000, + uint32_t num_instances = DEFAULT_BATCH_SIZE +); + +// Fuzz with custom configuration +__host__ fuzzer_stats_t fuzz_with_config( + const char* contract_source, + const char* contract_name, + const fuzzer_config_t& config +); + +// Multi-contract fuzzing +__host__ void fuzz_multi_contract( + const char** contract_sources, + const char** contract_names, + uint32_t num_contracts, + const fuzzer_config_t& config, + fuzzer_stats_t* combined_stats +); + +// ============================================================================ +// CUDA Kernels +// ============================================================================ + +// Main fuzzing kernel that executes EVM instances +__global__ void kernel_execute_batch( + void* evm_instances, // CuEVM instances + mutation_input_t* inputs, + instance_coverage_t* coverage, + execution_state_tracker_t* trackers, + bool* success, + uint8_t* return_data, + uint32_t* return_sizes, + uint64_t* gas_used, + uint32_t num_instances +); + +// Coverage merge kernel +__global__ void kernel_merge_batch_coverage( + instance_coverage_t* instance_coverage, + gpu_coverage_map_t* global_coverage, + uint32_t num_instances, + uint32_t* new_coverage_flags +); + +// Oracle checking kernel +__global__ void kernel_run_oracles( + CompositeOracle* oracle, + execution_state_tracker_t* trackers, + uint32_t num_instances, + bug_storage_t* bugs +); + +// Corpus selection kernel +__global__ void kernel_weighted_selection( + seed_entry_t* seeds, + uint32_t num_seeds, + uint32_t* cumulative_weights, + uint32_t* selected_indices, + uint32_t num_to_select, + curandState* rng +); + +} // namespace fuzzing +} // namespace CuEVM + +#endif // _CUEVM_GPU_FUZZER_H_ diff --git a/CuEVM/include/CuEVM/fuzzing/mutation.cuh b/CuEVM/include/CuEVM/fuzzing/mutation.cuh new file mode 100644 index 0000000..9ac2215 --- /dev/null +++ b/CuEVM/include/CuEVM/fuzzing/mutation.cuh @@ -0,0 +1,458 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// GPU Mutation Engine for NVIDIA B300 Smart Contract Fuzzing +// SPDX-License-Identifier: MIT + +#ifndef _CUEVM_FUZZING_MUTATION_H_ +#define _CUEVM_FUZZING_MUTATION_H_ + +#include +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// ============================================================================ +// Configuration Constants for B300 Optimization +// ============================================================================ + +constexpr uint32_t MAX_MUTATION_SIZE = 4096; // Max bytes to mutate +constexpr uint32_t MAX_DICTIONARY_SIZE = 1024; // Dictionary entries +constexpr uint32_t MAX_INTERESTING_VALUES = 256; // Interesting value pool +constexpr uint32_t MUTATION_STACK_SIZE = 16; // Havoc mutation stack +constexpr uint32_t MAX_SPLICE_LENGTH = 512; // Max splice size +constexpr uint32_t ARITH_MAX_DELTA = 35; // Max arithmetic delta + +// Mutation type weights (0-255 for probability weighting) +constexpr uint8_t WEIGHT_BIT_FLIP = 20; +constexpr uint8_t WEIGHT_BYTE_FLIP = 20; +constexpr uint8_t WEIGHT_ARITH_INC = 15; +constexpr uint8_t WEIGHT_ARITH_DEC = 15; +constexpr uint8_t WEIGHT_INTERESTING = 25; +constexpr uint8_t WEIGHT_DICTIONARY = 30; +constexpr uint8_t WEIGHT_HAVOC = 40; +constexpr uint8_t WEIGHT_SPLICE = 15; +constexpr uint8_t WEIGHT_COPY = 10; +constexpr uint8_t WEIGHT_INSERT = 10; +constexpr uint8_t WEIGHT_DELETE = 10; +constexpr uint8_t WEIGHT_OVERWRITE = 15; +constexpr uint8_t WEIGHT_CROSSOVER = 20; + +// ============================================================================ +// Mutation Types +// ============================================================================ + +enum class MutationType : uint8_t { + // Bit-level mutations + FLIP_BIT_1 = 0, + FLIP_BIT_2 = 1, + FLIP_BIT_4 = 2, + + // Byte-level mutations + FLIP_BYTE_1 = 3, + FLIP_BYTE_2 = 4, + FLIP_BYTE_4 = 5, + + // Arithmetic mutations + ARITH_INC_8 = 6, + ARITH_DEC_8 = 7, + ARITH_INC_16 = 8, + ARITH_DEC_16 = 9, + ARITH_INC_32 = 10, + ARITH_DEC_32 = 11, + ARITH_INC_64 = 12, + ARITH_DEC_64 = 13, + + // Interesting value replacements + INTERESTING_8 = 14, + INTERESTING_16 = 15, + INTERESTING_32 = 16, + INTERESTING_64 = 17, + INTERESTING_256 = 18, + + // Dictionary-based + DICT_INSERT = 19, + DICT_OVERWRITE = 20, + + // Structural mutations + CLONE_BYTE = 21, + DELETE_BYTES = 22, + INSERT_BYTES = 23, + OVERWRITE_BYTES = 24, + SWAP_BYTES = 25, + SHUFFLE_BYTES = 26, + + // Havoc (random multi-mutation) + HAVOC_SINGLE = 27, + HAVOC_MULTI = 28, + + // Cross-input mutations + SPLICE = 29, + CROSSOVER = 30, + + // EVM-specific mutations + EVM_ADDRESS = 31, + EVM_UINT256 = 32, + EVM_BYTES32 = 33, + EVM_SELECTOR = 34, + EVM_CALLDATA = 35, + + // Boundary mutations + BOUNDARY_LOW = 36, + BOUNDARY_HIGH = 37, + BOUNDARY_POWER2 = 38, + + // Gradient-guided + GRADIENT_INC = 39, + GRADIENT_DEC = 40, + + NUM_MUTATION_TYPES = 41 +}; + +// ============================================================================ +// Interesting Values for Smart Contracts +// ============================================================================ + +// Interesting values counts (values defined in mutation.cu) +constexpr uint32_t NUM_INTERESTING_8 = 9; +constexpr uint32_t NUM_INTERESTING_16 = 15; +constexpr uint32_t NUM_INTERESTING_32 = 23; +constexpr uint32_t NUM_INTERESTING_64 = 14; + +// External declarations for device constant memory arrays (defined in mutation.cu) +extern __constant__ int8_t INTERESTING_8_VALUES[NUM_INTERESTING_8]; +extern __constant__ int16_t INTERESTING_16_VALUES[NUM_INTERESTING_16]; +extern __constant__ int32_t INTERESTING_32_VALUES[NUM_INTERESTING_32]; +extern __constant__ int64_t INTERESTING_64_VALUES[NUM_INTERESTING_64]; + +// EVM-specific interesting values +struct evm_interesting_t { + evm_word_t value; + const char* description; +}; + +// ============================================================================ +// Dictionary Entry for Smart Contract Fuzzing +// ============================================================================ + +struct dictionary_entry_t { + uint8_t data[64]; // Entry data (max 64 bytes) + uint8_t length; // Actual length + uint8_t entry_type; // Type: address, selector, value, etc. + uint16_t hit_count; // How often this produced new coverage + uint32_t source_pc; // Where this value was observed +}; + +enum class DictionaryEntryType : uint8_t { + ADDRESS = 0, + FUNCTION_SELECTOR = 1, + UINT256_VALUE = 2, + BYTES32_VALUE = 3, + STRING_VALUE = 4, + ARRAY_LENGTH = 5, + STORAGE_SLOT = 6, + BLOCK_VALUE = 7, + COMPARISON_OPERAND = 8, + MAGIC_CONSTANT = 9 +}; + +// ============================================================================ +// Mutation Dictionary +// ============================================================================ + +struct mutation_dictionary_t { + dictionary_entry_t entries[MAX_DICTIONARY_SIZE]; + uint32_t num_entries; + uint32_t next_insert_idx; + + // Type-specific indices for efficient lookup + uint16_t address_indices[256]; + uint16_t selector_indices[256]; + uint16_t value_indices[256]; + uint16_t num_addresses; + uint16_t num_selectors; + uint16_t num_values; + + __host__ __device__ void init(); + __host__ __device__ bool add_entry(const uint8_t* data, uint8_t length, DictionaryEntryType type, uint32_t pc); + __host__ __device__ const dictionary_entry_t* get_random(curandState* rng, DictionaryEntryType type = (DictionaryEntryType)255); + __host__ __device__ void update_hit_count(uint32_t idx); +}; + +// ============================================================================ +// Input Representation for Mutation +// ============================================================================ + +struct mutation_input_t { + uint8_t* data; // Raw input bytes + uint32_t length; // Current length + uint32_t capacity; // Max allocated size + + // EVM-specific parsed structure + uint8_t selector[4]; // Function selector + uint32_t num_params; // Number of ABI parameters + uint32_t param_offsets[32]; // Offset of each parameter + uint8_t param_types[32]; // Type of each parameter + + // Transaction context + evm_word_t value; // msg.value + evm_word_t gas_limit; // Gas limit + evm_word_t sender; // msg.sender + evm_word_t receiver; // Target address + + // Block context + evm_word_t block_number; + evm_word_t timestamp; + evm_word_t basefee; + evm_word_t prevrandao; + + __host__ __device__ void init(uint32_t max_size); + __host__ __device__ void copy_from(const mutation_input_t& other); + __host__ __device__ void parse_abi(); + __host__ __device__ void reserialize_abi(); +}; + +// ============================================================================ +// Mutation Result +// ============================================================================ + +struct mutation_result_t { + MutationType type; + uint32_t offset; + uint32_t length; + int32_t size_delta; // Change in input size + bool success; + uint32_t mutation_id; // For tracking/replay +}; + +// ============================================================================ +// GPU Random Number Generator State +// ============================================================================ + +struct gpu_rng_state_t { + curandState* states; // Per-thread RNG states + uint32_t num_states; + + __host__ void init(uint32_t num_threads, uint64_t seed); + __host__ void free(); +}; + +// ============================================================================ +// GPU Mutation Engine +// ============================================================================ + +class GPUMutationEngine { +public: + __host__ GPUMutationEngine(uint32_t num_instances, uint64_t seed = 0); + __host__ ~GPUMutationEngine(); + + // Single mutation operations + __device__ mutation_result_t mutate(mutation_input_t* input, curandState* rng); + __device__ mutation_result_t mutate_typed(mutation_input_t* input, MutationType type, curandState* rng); + + // Batch mutations + __host__ void mutate_batch(mutation_input_t* inputs, uint32_t num_inputs, + uint32_t mutations_per_input, cudaStream_t stream = 0); + + // Havoc mutation (multiple random mutations) + __device__ void havoc(mutation_input_t* input, curandState* rng, uint32_t num_mutations); + + // Splice two inputs + __device__ void splice(mutation_input_t* dst, const mutation_input_t* src1, + const mutation_input_t* src2, curandState* rng); + + // Crossover two inputs + __device__ void crossover(mutation_input_t* dst, const mutation_input_t* src1, + const mutation_input_t* src2, curandState* rng); + + // EVM-specific mutations + __device__ void mutate_address(mutation_input_t* input, uint32_t offset, curandState* rng); + __device__ void mutate_uint256(mutation_input_t* input, uint32_t offset, curandState* rng); + __device__ void mutate_selector(mutation_input_t* input, curandState* rng); + __device__ void mutate_calldata(mutation_input_t* input, curandState* rng); + __device__ void mutate_value(mutation_input_t* input, curandState* rng); + __device__ void mutate_gas(mutation_input_t* input, curandState* rng); + __device__ void mutate_sender(mutation_input_t* input, curandState* rng); + __device__ void mutate_block_context(mutation_input_t* input, curandState* rng); + + // Dictionary operations + __host__ __device__ void add_to_dictionary(const uint8_t* data, uint8_t length, + DictionaryEntryType type, uint32_t pc); + __device__ void apply_dictionary(mutation_input_t* input, curandState* rng); + + // Gradient-guided mutation + __device__ void gradient_mutate(mutation_input_t* input, uint32_t target_offset, + bool increase, curandState* rng); + + // Configuration + __host__ void set_mutation_weights(const uint8_t* weights); + __host__ void set_max_mutations(uint32_t max); + __host__ void enable_abi_aware(bool enable); + + // Get dictionary + __host__ __device__ mutation_dictionary_t* get_dictionary() { return dictionary_; } + +private: + gpu_rng_state_t rng_state_; + mutation_dictionary_t* dictionary_; + uint8_t mutation_weights_[64]; + uint32_t max_mutations_; + bool abi_aware_; + + // Internal mutation implementations + __device__ void flip_bit(uint8_t* data, uint32_t length, uint32_t offset, uint8_t width); + __device__ void flip_byte(uint8_t* data, uint32_t length, uint32_t offset, uint8_t width); + __device__ void arith_mutation(uint8_t* data, uint32_t length, uint32_t offset, uint8_t width, bool increment, int32_t delta); + __device__ void interesting_mutation(uint8_t* data, uint32_t length, uint32_t offset, uint8_t width, curandState* rng); + __device__ void clone_bytes(mutation_input_t* input, uint32_t src_offset, uint32_t dst_offset, uint32_t count); + __device__ void delete_bytes(mutation_input_t* input, uint32_t offset, uint32_t count); + __device__ void insert_bytes(mutation_input_t* input, uint32_t offset, const uint8_t* data, uint32_t count); + __device__ void overwrite_bytes(mutation_input_t* input, uint32_t offset, const uint8_t* data, uint32_t count); + __device__ void swap_bytes(uint8_t* data, uint32_t offset1, uint32_t offset2, uint32_t count); + __device__ void shuffle_bytes(uint8_t* data, uint32_t offset, uint32_t count, curandState* rng); + + __device__ MutationType select_mutation_type(curandState* rng); + __device__ uint32_t select_offset(uint32_t length, curandState* rng); +}; + +// ============================================================================ +// Sequence Mutation (for multi-transaction fuzzing) +// ============================================================================ + +struct transaction_t { + mutation_input_t input; + uint32_t sequence_id; + uint32_t tx_index; + bool is_deploy; // CREATE/CREATE2 +}; + +struct sequence_t { + transaction_t* transactions; + uint32_t num_transactions; + uint32_t capacity; + uint64_t seed; // For deterministic replay + + __host__ __device__ void init(uint32_t max_txs); + __host__ __device__ void add_transaction(const transaction_t& tx); + __host__ __device__ void remove_transaction(uint32_t index); + __host__ __device__ void reorder(uint32_t from, uint32_t to); + __host__ __device__ void copy_from(const sequence_t& other); +}; + +class SequenceMutator { +public: + __host__ SequenceMutator(GPUMutationEngine* engine); + + // Sequence-level mutations + __device__ void mutate_sequence(sequence_t* seq, curandState* rng); + __device__ void insert_transaction(sequence_t* seq, uint32_t index, curandState* rng); + __device__ void delete_transaction(sequence_t* seq, uint32_t index); + __device__ void duplicate_transaction(sequence_t* seq, uint32_t index); + __device__ void swap_transactions(sequence_t* seq, uint32_t idx1, uint32_t idx2); + __device__ void splice_sequences(sequence_t* dst, const sequence_t* src1, const sequence_t* src2, curandState* rng); + + // Mutate individual transaction in sequence + __device__ void mutate_transaction(sequence_t* seq, uint32_t tx_index, curandState* rng); + + // Mutate sender pattern across sequence + __device__ void mutate_sender_pattern(sequence_t* seq, curandState* rng); + + // Mutate value flow across sequence + __device__ void mutate_value_flow(sequence_t* seq, curandState* rng); + +private: + GPUMutationEngine* engine_; +}; + +// ============================================================================ +// ABI-Aware Mutation Helpers +// ============================================================================ + +namespace abi { + +// ABI type codes +enum class ABIType : uint8_t { + UINT8 = 0, UINT16 = 1, UINT32 = 2, UINT64 = 3, UINT128 = 4, UINT256 = 5, + INT8 = 6, INT16 = 7, INT32 = 8, INT64 = 9, INT128 = 10, INT256 = 11, + ADDRESS = 12, + BOOL = 13, + BYTES1 = 14, BYTES2 = 15, BYTES4 = 16, BYTES8 = 17, BYTES16 = 18, BYTES32 = 19, + BYTES_DYN = 20, + STRING = 21, + ARRAY_FIXED = 22, + ARRAY_DYN = 23, + TUPLE = 24, + FUNCTION = 25 +}; + +__device__ ABIType detect_param_type(const uint8_t* data, uint32_t offset, uint32_t length); +__device__ uint32_t get_type_size(ABIType type); +__device__ void mutate_by_type(uint8_t* data, uint32_t offset, ABIType type, curandState* rng); +__device__ void generate_by_type(uint8_t* data, uint32_t offset, ABIType type, curandState* rng); + +// Parse function selector to get expected parameter types +__device__ bool lookup_selector(const uint8_t* selector, ABIType* param_types, uint32_t* num_params); + +} // namespace abi + +// ============================================================================ +// CUDA Kernels +// ============================================================================ + +// Kernel to initialize RNG states +__global__ void kernel_init_rng(curandState* states, uint32_t num_states, uint64_t seed); + +// Kernel to mutate a batch of inputs +__global__ void kernel_mutate_batch( + GPUMutationEngine* engine, + mutation_input_t* inputs, + uint32_t num_inputs, + uint32_t mutations_per_input, + curandState* rng_states, + mutation_result_t* results +); + +// Kernel to perform havoc mutation +__global__ void kernel_havoc_batch( + GPUMutationEngine* engine, + mutation_input_t* inputs, + uint32_t num_inputs, + uint32_t havoc_iterations, + curandState* rng_states +); + +// Kernel to splice inputs pairwise +__global__ void kernel_splice_batch( + GPUMutationEngine* engine, + mutation_input_t* dst, + const mutation_input_t* src1, + const mutation_input_t* src2, + uint32_t num_pairs, + curandState* rng_states +); + +// Kernel to mutate sequences +__global__ void kernel_mutate_sequences( + SequenceMutator* mutator, + sequence_t* sequences, + uint32_t num_sequences, + curandState* rng_states +); + +// ============================================================================ +// Host Helper Functions +// ============================================================================ + +__host__ void allocate_mutation_inputs(mutation_input_t** inputs, uint32_t num_inputs, uint32_t max_size); +__host__ void free_mutation_inputs(mutation_input_t* inputs, uint32_t num_inputs); +__host__ void copy_inputs_to_device(mutation_input_t* d_inputs, const mutation_input_t* h_inputs, uint32_t num_inputs); +__host__ void copy_inputs_to_host(mutation_input_t* h_inputs, const mutation_input_t* d_inputs, uint32_t num_inputs); + +__host__ void allocate_sequences(sequence_t** sequences, uint32_t num_sequences, uint32_t max_txs); +__host__ void free_sequences(sequence_t* sequences, uint32_t num_sequences); + +} // namespace fuzzing +} // namespace CuEVM + +#endif // _CUEVM_FUZZING_MUTATION_H_ diff --git a/CuEVM/include/CuEVM/fuzzing/oracle.cuh b/CuEVM/include/CuEVM/fuzzing/oracle.cuh new file mode 100644 index 0000000..433e974 --- /dev/null +++ b/CuEVM/include/CuEVM/fuzzing/oracle.cuh @@ -0,0 +1,600 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// Comprehensive Oracle and Bug Detection for Smart Contract Fuzzing +// SPDX-License-Identifier: MIT + +#ifndef _CUEVM_FUZZING_ORACLE_H_ +#define _CUEVM_FUZZING_ORACLE_H_ + +#include +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// ============================================================================ +// Bug Types and Severity Levels +// ============================================================================ + +enum class BugType : uint8_t { + // Arithmetic vulnerabilities + INTEGER_OVERFLOW = 0, + INTEGER_UNDERFLOW = 1, + DIVISION_BY_ZERO = 2, + MODULO_BY_ZERO = 3, + EXPONENT_OVERFLOW = 4, + + // Access control vulnerabilities + UNAUTHORIZED_CALL = 10, + UNAUTHORIZED_SELFDESTRUCT = 11, + UNAUTHORIZED_DELEGATECALL = 12, + TX_ORIGIN_AUTH = 13, + MISSING_ACCESS_CONTROL = 14, + + // Reentrancy vulnerabilities + REENTRANCY_ETH = 20, + REENTRANCY_ERC20 = 21, + REENTRANCY_CROSS_FUNCTION = 22, + REENTRANCY_CROSS_CONTRACT = 23, + READ_ONLY_REENTRANCY = 24, + + // State manipulation + UNINITIALIZED_STORAGE = 30, + STORAGE_COLLISION = 31, + DIRTY_HIGH_BITS = 32, + UNCHECKED_RETURN = 33, + + // Token vulnerabilities + ERC20_APPROVAL_RACE = 40, + ERC20_TRANSFER_TO_ZERO = 41, + ERC20_BURN_WITHOUT_APPROVAL = 42, + ERC721_UNAUTHORIZED_TRANSFER = 43, + TOKEN_BALANCE_MANIPULATION = 44, + + // Oracle/price manipulation + ORACLE_MANIPULATION = 50, + FLASHLOAN_ATTACK = 51, + SANDWICH_VULNERABLE = 52, + SLIPPAGE_VULNERABILITY = 53, + + // Gas vulnerabilities + BLOCK_GAS_LIMIT = 60, + UNBOUNDED_LOOP = 61, + GAS_GRIEFING = 62, + OUT_OF_GAS_CALL = 63, + + // Fund safety + ETHER_LEAK = 70, + STUCK_ETHER = 71, + UNEXPECTED_ETH_BALANCE = 72, + FORCE_FEED_VULNERABLE = 73, + SELFDESTRUCT_ETH_LEAK = 74, + + // Logic bugs + ASSERTION_VIOLATION = 80, + INVARIANT_VIOLATION = 81, + STATE_INCONSISTENCY = 82, + UNEXPECTED_REVERT = 83, + + // External interaction issues + EXTERNAL_CALL_FAILURE = 90, + UNTRUSTED_EXTERNAL_CALL = 91, + RETURN_DATA_MANIPULATION = 92, + + // Signature/crypto issues + SIGNATURE_REPLAY = 100, + SIGNATURE_MALLEABILITY = 101, + WEAK_RANDOMNESS = 102, + + // Proxy pattern issues + UNINITIALIZED_PROXY = 110, + STORAGE_SLOT_COLLISION = 111, + IMPLEMENTATION_DESTROYED = 112, + + // Custom/unknown + CUSTOM_ORACLE_VIOLATION = 200, + UNKNOWN = 255 +}; + +enum class BugSeverity : uint8_t { + INFORMATIONAL = 0, + LOW = 1, + MEDIUM = 2, + HIGH = 3, + CRITICAL = 4 +}; + +// ============================================================================ +// Bug Detection Result +// ============================================================================ + +struct bug_location_t { + uint32_t pc; // Program counter where bug occurred + uint32_t tx_index; // Transaction index in sequence + uint32_t call_depth; // Call stack depth + uint32_t contract_id; // Contract identifier + uint8_t opcode; // Opcode that triggered the bug +}; + +struct bug_context_t { + evm_word_t operand1; // First operand (for arithmetic bugs) + evm_word_t operand2; // Second operand + evm_word_t result; // Result value + evm_word_t expected; // Expected value (for invariant checks) + evm_word_t caller; // msg.sender + evm_word_t callee; // Call target + evm_word_t value; // msg.value + uint8_t context_data[256]; // Additional context + uint32_t context_length; +}; + +struct detected_bug_t { + BugType type; + BugSeverity severity; + bug_location_t location; + bug_context_t context; + uint64_t timestamp; // When the bug was detected + uint64_t input_hash; // Hash of input that triggered the bug + uint32_t sequence_id; // Sequence that triggered the bug + bool confirmed; // Whether bug was confirmed on replay + char description[256]; // Human-readable description +}; + +// ============================================================================ +// Oracle Configuration +// ============================================================================ + +struct oracle_config_t { + // Arithmetic checks + bool check_overflow; + bool check_underflow; + bool check_div_zero; + + // Access control checks + bool check_unauthorized_access; + bool check_tx_origin; + bool check_selfdestruct; + + // Reentrancy checks + bool check_reentrancy; + bool check_cross_function_reentrancy; + bool check_read_only_reentrancy; + + // Token checks + bool check_erc20_issues; + bool check_erc721_issues; + + // Fund safety checks + bool check_ether_leak; + bool check_stuck_ether; + bool check_force_feed; + + // Gas checks + bool check_gas_issues; + + // Severity threshold (only report bugs >= this severity) + BugSeverity min_severity; + + // Maximum bugs to track per type + uint32_t max_bugs_per_type; + + // Deduplication window + uint32_t dedup_window_size; + + __host__ __device__ void set_default(); + __host__ __device__ void enable_all(); + __host__ __device__ void set_minimal(); +}; + +// ============================================================================ +// Bug Storage +// ============================================================================ + +constexpr uint32_t MAX_BUGS_TOTAL = 4096; +constexpr uint32_t MAX_BUGS_PER_TYPE = 256; + +struct bug_storage_t { + detected_bug_t bugs[MAX_BUGS_TOTAL]; + uint32_t bug_count; + + // Deduplication - track recent bug signatures + uint64_t recent_signatures[1024]; + uint32_t signature_idx; + + // Per-type counts + uint32_t type_counts[(uint32_t)BugType::UNKNOWN + 1]; + + __host__ __device__ void init(); + __host__ __device__ bool add_bug(const detected_bug_t& bug); + __host__ __device__ bool is_duplicate(uint64_t signature); + __host__ __device__ uint32_t count_by_type(BugType type); + __host__ __device__ uint32_t count_by_severity(BugSeverity severity); + __host__ __device__ void clear(); +}; + +// ============================================================================ +// Execution State Tracker (for reentrancy detection) +// ============================================================================ + +constexpr uint32_t MAX_CALL_DEPTH = 64; +constexpr uint32_t MAX_STORAGE_WRITES = 256; + +struct call_frame_t { + evm_word_t caller; + evm_word_t callee; + evm_word_t value; + uint32_t pc; + uint8_t opcode; // CALL, CALLCODE, DELEGATECALL, STATICCALL + bool has_state_change; // Whether state was modified before call + bool is_external; // Whether call is to external contract +}; + +struct storage_write_t { + evm_word_t address; + evm_word_t slot; + evm_word_t old_value; + evm_word_t new_value; + uint32_t pc; + uint32_t call_depth; +}; + +struct execution_state_tracker_t { + // Call stack + call_frame_t call_stack[MAX_CALL_DEPTH]; + uint32_t call_depth; + + // Storage writes (for reentrancy detection) + storage_write_t storage_writes[MAX_STORAGE_WRITES]; + uint32_t num_storage_writes; + + // Balance tracking + evm_word_t initial_balances[64]; // Track initial balances + evm_word_t current_balances[64]; // Current balances + uint32_t num_tracked_addresses; + + // Reentrancy detection + bool in_external_call; + bool state_modified_before_call; + uint32_t reentrancy_guard_slot; // If we detect a reentrancy guard + + // Gas tracking + uint64_t initial_gas; + uint64_t gas_used; + + // Return value tracking + bool last_call_success; + bool last_call_checked; + + __host__ __device__ void init(); + __host__ __device__ void push_call(const call_frame_t& frame); + __host__ __device__ void pop_call(); + __host__ __device__ void record_storage_write(const storage_write_t& write); + __host__ __device__ bool check_reentrancy(); + __host__ __device__ void track_balance(const evm_word_t& address, const evm_word_t& balance); +}; + +// ============================================================================ +// Oracle Detector Base Class +// ============================================================================ + +class OracleDetector { +public: + __host__ __device__ OracleDetector(oracle_config_t* config, bug_storage_t* storage); + + // Pre-execution hooks + __host__ __device__ void on_transaction_start(const evm_word_t& sender, const evm_word_t& receiver, + const evm_word_t& value, const uint8_t* calldata, uint32_t calldata_len); + + // Instruction-level hooks + __host__ __device__ void on_instruction(uint32_t pc, uint8_t opcode, + const evm_word_t* stack, uint32_t stack_size, + execution_state_tracker_t* tracker); + + // Arithmetic operation hooks + __host__ __device__ void check_add(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result); + __host__ __device__ void check_sub(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result); + __host__ __device__ void check_mul(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result); + __host__ __device__ void check_div(uint32_t pc, const evm_word_t& a, const evm_word_t& b); + __host__ __device__ void check_mod(uint32_t pc, const evm_word_t& a, const evm_word_t& b); + __host__ __device__ void check_exp(uint32_t pc, const evm_word_t& base, const evm_word_t& exp, + const evm_word_t& result); + + // Storage hooks + __host__ __device__ void on_sload(uint32_t pc, const evm_word_t& slot, const evm_word_t& value, + execution_state_tracker_t* tracker); + __host__ __device__ void on_sstore(uint32_t pc, const evm_word_t& slot, + const evm_word_t& old_value, const evm_word_t& new_value, + execution_state_tracker_t* tracker); + + // Call hooks + __host__ __device__ void on_call_start(uint32_t pc, uint8_t opcode, + const evm_word_t& target, const evm_word_t& value, + const evm_word_t& gas, + execution_state_tracker_t* tracker); + __host__ __device__ void on_call_end(uint32_t pc, bool success, const uint8_t* return_data, + uint32_t return_size, execution_state_tracker_t* tracker); + + // Balance hooks + __host__ __device__ void on_balance_change(const evm_word_t& address, + const evm_word_t& old_balance, const evm_word_t& new_balance); + + // Special instruction hooks + __host__ __device__ void on_selfdestruct(uint32_t pc, const evm_word_t& beneficiary, + const evm_word_t& balance); + __host__ __device__ void on_create(uint32_t pc, const evm_word_t& value, + const evm_word_t& new_address); + __host__ __device__ void on_origin(uint32_t pc); + + // Post-execution hooks + __host__ __device__ void on_transaction_end(bool success, const uint8_t* return_data, + uint32_t return_size, uint64_t gas_used, + execution_state_tracker_t* tracker); + + // Invariant checking + __host__ __device__ void check_custom_invariant(uint32_t invariant_id, bool condition, + const char* description); + + // Get results + __host__ __device__ bug_storage_t* get_bugs() { return storage_; } + __host__ __device__ uint32_t get_bug_count() { return storage_->bug_count; } + +protected: + oracle_config_t* config_; + bug_storage_t* storage_; + uint32_t current_tx_index_; + uint32_t current_sequence_id_; + evm_word_t current_sender_; + evm_word_t current_receiver_; + + __host__ __device__ void report_bug(BugType type, BugSeverity severity, + const bug_location_t& location, + const bug_context_t& context, + const char* description); + + __host__ __device__ uint64_t compute_bug_signature(BugType type, uint32_t pc, + const evm_word_t& key_value); + + __host__ __device__ BugSeverity determine_severity(BugType type, const bug_context_t& context); + +private: + // Reentrancy detection helpers + __host__ __device__ bool is_reentrancy_safe_call(uint8_t opcode, const evm_word_t& target); + __host__ __device__ bool is_reentrancy_guard_pattern(const evm_word_t& slot, + const evm_word_t& old_value, + const evm_word_t& new_value); + + // Arithmetic overflow detection helpers + __host__ __device__ bool check_add_overflow(const evm_word_t& a, const evm_word_t& b); + __host__ __device__ bool check_mul_overflow(const evm_word_t& a, const evm_word_t& b); + __host__ __device__ bool check_sub_underflow(const evm_word_t& a, const evm_word_t& b); +}; + +// ============================================================================ +// Specialized Oracles +// ============================================================================ + +/** + * Integer overflow/underflow detector + */ +class ArithmeticOracle : public OracleDetector { +public: + __host__ __device__ ArithmeticOracle(oracle_config_t* config, bug_storage_t* storage); + + // Safe math verification + __host__ __device__ void verify_safe_add(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result); + __host__ __device__ void verify_safe_sub(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result); + __host__ __device__ void verify_safe_mul(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result); +}; + +/** + * Reentrancy vulnerability detector + */ +class ReentrancyOracle : public OracleDetector { +public: + __host__ __device__ ReentrancyOracle(oracle_config_t* config, bug_storage_t* storage); + + __host__ __device__ void track_external_call(uint32_t pc, const evm_word_t& target, + execution_state_tracker_t* tracker); + __host__ __device__ void track_state_modification(uint32_t pc, const evm_word_t& slot, + execution_state_tracker_t* tracker); + __host__ __device__ void check_reentrancy_pattern(execution_state_tracker_t* tracker); + +private: + // Known reentrancy guard patterns + bool has_reentrancy_guard_; + evm_word_t guard_slot_; +}; + +/** + * Access control vulnerability detector + */ +class AccessControlOracle : public OracleDetector { +public: + __host__ __device__ AccessControlOracle(oracle_config_t* config, bug_storage_t* storage); + + // Track privileged operations + __host__ __device__ void on_privileged_operation(uint32_t pc, uint8_t opcode, + const evm_word_t& sender); + + // Track authorization checks + __host__ __device__ void on_authorization_check(uint32_t pc, const evm_word_t& checked_address); + + // Verify access control + __host__ __device__ void verify_access_control(uint32_t pc, uint8_t operation); + +private: + bool authorization_checked_; + evm_word_t authorized_addresses_[16]; + uint32_t num_authorized_; +}; + +/** + * ERC20/Token vulnerability detector + */ +class TokenOracle : public OracleDetector { +public: + __host__ __device__ TokenOracle(oracle_config_t* config, bug_storage_t* storage); + + // ERC20 specific checks + __host__ __device__ void check_transfer(uint32_t pc, const evm_word_t& from, + const evm_word_t& to, const evm_word_t& amount); + __host__ __device__ void check_approve(uint32_t pc, const evm_word_t& owner, + const evm_word_t& spender, const evm_word_t& amount); + __host__ __device__ void check_transferFrom(uint32_t pc, const evm_word_t& from, + const evm_word_t& to, const evm_word_t& amount, + const evm_word_t& allowance); + + // Balance consistency + __host__ __device__ void track_balance_change(const evm_word_t& address, + const evm_word_t& old_balance, + const evm_word_t& new_balance); + __host__ __device__ void check_total_supply_consistency(); + +private: + evm_word_t tracked_total_supply_; + uint32_t total_supply_slot_; +}; + +/** + * Fund safety oracle (Ether leak detection) + */ +class FundSafetyOracle : public OracleDetector { +public: + __host__ __device__ FundSafetyOracle(oracle_config_t* config, bug_storage_t* storage); + + // Track ETH flow + __host__ __device__ void on_eth_received(const evm_word_t& from, const evm_word_t& amount); + __host__ __device__ void on_eth_sent(uint32_t pc, const evm_word_t& to, const evm_word_t& amount); + + // Check for stuck ETH + __host__ __device__ void check_stuck_ether(const evm_word_t& contract_balance); + + // Check for unexpected ETH + __host__ __device__ void check_unexpected_eth(const evm_word_t& expected, const evm_word_t& actual); + + // Selfdestruct checks + __host__ __device__ void check_selfdestruct_safety(uint32_t pc, const evm_word_t& beneficiary); + +private: + evm_word_t total_eth_received_; + evm_word_t total_eth_sent_; + bool has_withdrawal_function_; +}; + +/** + * Gas-related vulnerability detector + */ +class GasOracle : public OracleDetector { +public: + __host__ __device__ GasOracle(oracle_config_t* config, bug_storage_t* storage); + + // Track gas usage + __host__ __device__ void on_gas_usage(uint32_t pc, uint64_t gas_used, uint64_t gas_remaining); + + // Detect potential DoS + __host__ __device__ void check_unbounded_loop(uint32_t pc, uint32_t iteration_count); + __host__ __device__ void check_block_gas_limit(uint64_t total_gas); + + // External call gas checks + __host__ __device__ void check_call_gas(uint32_t pc, uint64_t gas_forwarded); + +private: + uint64_t max_gas_observed_; + uint32_t loop_iteration_counts_[64]; + uint32_t loop_pcs_[64]; + uint32_t num_loops_; +}; + +// ============================================================================ +// Composite Oracle (combines all detectors) +// ============================================================================ + +class CompositeOracle { +public: + __host__ __device__ CompositeOracle(oracle_config_t* config, bug_storage_t* storage); + + // Initialize all sub-oracles + __host__ __device__ void init(); + + // Forward hooks to all active oracles + __host__ __device__ void on_transaction_start(const evm_word_t& sender, const evm_word_t& receiver, + const evm_word_t& value, const uint8_t* calldata, + uint32_t calldata_len); + __host__ __device__ void on_instruction(uint32_t pc, uint8_t opcode, + const evm_word_t* stack, uint32_t stack_size, + execution_state_tracker_t* tracker); + __host__ __device__ void on_transaction_end(bool success, const uint8_t* return_data, + uint32_t return_size, uint64_t gas_used, + execution_state_tracker_t* tracker); + + // Get combined results + __host__ __device__ bug_storage_t* get_bugs() { return storage_; } + +private: + oracle_config_t* config_; + bug_storage_t* storage_; + + ArithmeticOracle arithmetic_; + ReentrancyOracle reentrancy_; + AccessControlOracle access_control_; + TokenOracle token_; + FundSafetyOracle fund_safety_; + GasOracle gas_; +}; + +// ============================================================================ +// CUDA Kernels for Batch Oracle Checking +// ============================================================================ + +__global__ void kernel_check_arithmetic( + uint8_t opcode, + const evm_word_t* operands_a, + const evm_word_t* operands_b, + const evm_word_t* results, + uint32_t* pcs, + uint32_t num_operations, + bug_storage_t* bug_storage, + oracle_config_t* config +); + +__global__ void kernel_check_reentrancy( + execution_state_tracker_t* trackers, + uint32_t num_instances, + bug_storage_t* bug_storage, + oracle_config_t* config +); + +__global__ void kernel_check_invariants( + const evm_word_t* pre_state, + const evm_word_t* post_state, + const uint32_t* invariant_types, + uint32_t num_invariants, + bug_storage_t* bug_storage +); + +// ============================================================================ +// Host Helper Functions +// ============================================================================ + +__host__ oracle_config_t* allocate_oracle_config(); +__host__ bug_storage_t* allocate_bug_storage(); +__host__ execution_state_tracker_t* allocate_trackers(uint32_t num_instances); +__host__ void free_oracle_config(oracle_config_t* config); +__host__ void free_bug_storage(bug_storage_t* storage); +__host__ void free_trackers(execution_state_tracker_t* trackers); + +__host__ void copy_bugs_to_host(detected_bug_t* host_bugs, const bug_storage_t* device_storage); +__host__ void print_bug_report(const bug_storage_t* storage); +__host__ void export_bugs_json(const bug_storage_t* storage, const char* filename); + +} // namespace fuzzing +} // namespace CuEVM + +#endif // _CUEVM_FUZZING_ORACLE_H_ diff --git a/CuEVM/src/fuzzing/corpus.cu b/CuEVM/src/fuzzing/corpus.cu new file mode 100644 index 0000000..16b40aa --- /dev/null +++ b/CuEVM/src/fuzzing/corpus.cu @@ -0,0 +1,1682 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// GPU Corpus Management Implementation for Smart Contract Fuzzing +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// ============================================================================ +// Helper Functions +// ============================================================================ + +__host__ __device__ static uint64_t get_timestamp() { +#ifdef __CUDA_ARCH__ + return clock64(); +#else + return static_cast(time(nullptr)); +#endif +} + +__host__ __device__ static uint32_t hash_combine(uint32_t seed, uint32_t value) { + return seed ^ (value + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +} + +__host__ __device__ static uint32_t fnv1a_hash(const uint8_t* data, uint32_t len) { + uint32_t hash = 2166136261u; + for (uint32_t i = 0; i < len; i++) { + hash ^= data[i]; + hash *= 16777619u; + } + return hash; +} + +// ============================================================================ +// seed_entry_t Implementation +// ============================================================================ + +__host__ __device__ void seed_entry_t::init() { + data.data = nullptr; + data.length = 0; + data.capacity = 0; + + metadata.id = 0; + metadata.parent_id = 0; + metadata.timestamp = 0; + metadata.generation = 0; + metadata.unique_edges = 0; + metadata.unique_branches = 0; + metadata.coverage_hash = 0; + metadata.coverage_contribution = 0.0f; + metadata.execution_count = 0; + metadata.mutation_count = 0; + metadata.child_count = 0; + metadata.bug_count = 0; + metadata.energy = ENERGY_BASE; + metadata.priority = 0; + metadata.last_selected = 0; + metadata.minimized = false; + metadata.original_length = 0; + + num_transactions = 0; + for (uint32_t i = 0; i < MAX_SEQUENCE_LENGTH; i++) { + tx_offsets[i] = 0; + tx_lengths[i] = 0; + memset(&senders[i], 0, sizeof(evm_word_t)); + memset(&values[i], 0, sizeof(evm_word_t)); + memset(&receivers[i], 0, sizeof(evm_word_t)); + } + memset(&block_number, 0, sizeof(evm_word_t)); + memset(×tamp, 0, sizeof(evm_word_t)); +} + +__host__ __device__ void seed_entry_t::copy_from(const seed_entry_t& other) { + // Copy metadata + metadata = other.metadata; + num_transactions = other.num_transactions; + + // Copy transaction info + for (uint32_t i = 0; i < MAX_SEQUENCE_LENGTH; i++) { + tx_offsets[i] = other.tx_offsets[i]; + tx_lengths[i] = other.tx_lengths[i]; + senders[i] = other.senders[i]; + values[i] = other.values[i]; + receivers[i] = other.receivers[i]; + } + block_number = other.block_number; + timestamp = other.timestamp; + + // Deep copy data if allocated + if (other.data.data && other.data.length > 0) { + if (!data.data || data.capacity < other.data.length) { + // Need to allocate - this is tricky in device code + // Assume pre-allocated for device usage +#ifndef __CUDA_ARCH__ + if (data.data) { + delete[] data.data; + } + data.data = new uint8_t[other.data.length]; + data.capacity = other.data.length; +#endif + } + if (data.data) { + memcpy(data.data, other.data.data, other.data.length); + data.length = other.data.length; + } + } +} + +__host__ __device__ void seed_entry_t::set_transaction(uint32_t tx_idx, const uint8_t* calldata, + uint32_t len, const evm_word_t& sender, + const evm_word_t& value) { + if (tx_idx >= MAX_SEQUENCE_LENGTH) return; + + // Calculate offset + uint32_t offset = 0; + if (tx_idx > 0) { + offset = tx_offsets[tx_idx - 1] + tx_lengths[tx_idx - 1]; + } + + // Check capacity + if (offset + len > data.capacity) { +#ifndef __CUDA_ARCH__ + // Grow buffer + uint32_t new_capacity = (offset + len) * 2; + if (new_capacity > MAX_SEED_DATA_SIZE) new_capacity = MAX_SEED_DATA_SIZE; + uint8_t* new_data = new uint8_t[new_capacity]; + if (data.data && data.length > 0) { + memcpy(new_data, data.data, data.length); + delete[] data.data; + } + data.data = new_data; + data.capacity = new_capacity; +#else + return; // Can't grow in device code +#endif + } + + // Copy transaction data + if (data.data && calldata) { + memcpy(data.data + offset, calldata, len); + } + + tx_offsets[tx_idx] = offset; + tx_lengths[tx_idx] = len; + senders[tx_idx] = sender; + values[tx_idx] = value; + + if (tx_idx >= num_transactions) { + num_transactions = tx_idx + 1; + } + data.length = offset + len; +} + +// ============================================================================ +// corpus_stats_t Implementation +// ============================================================================ + +__host__ __device__ void corpus_stats_t::init() { + total_seeds_added = 0; + total_seeds_removed = 0; + total_executions = 0; + total_mutations = 0; + total_new_coverage = 0; + total_bugs_found = 0; + current_size = 0; + unique_coverage_edges = 0; + unique_coverage_branches = 0; + overall_coverage_percent = 0.0f; + last_new_coverage_time = 0; + last_bug_time = 0; + cycles_since_progress = 0; + initial_seeds = 0; + mutant_seeds = 0; + splice_seeds = 0; + minimized_seeds = 0; +} + +__host__ __device__ void corpus_stats_t::update_coverage(uint32_t new_edges, uint32_t new_branches) { + unique_coverage_edges += new_edges; + unique_coverage_branches += new_branches; + if (new_edges > 0 || new_branches > 0) { + total_new_coverage++; + last_new_coverage_time = get_timestamp(); + cycles_since_progress = 0; + } else { + cycles_since_progress++; + } +} + +__host__ __device__ void corpus_stats_t::record_new_seed(bool from_mutation, bool caused_new_coverage) { + total_seeds_added++; + current_size++; + if (from_mutation) { + mutant_seeds++; + } else { + initial_seeds++; + } + if (caused_new_coverage) { + total_new_coverage++; + } +} + +// ============================================================================ +// corpus_hash_table_t Implementation +// ============================================================================ + +__host__ __device__ void corpus_hash_table_t::init() { + for (uint32_t i = 0; i < CORPUS_BUCKET_COUNT; i++) { + buckets[i].count = 0; + for (uint32_t j = 0; j < 16; j++) { + buckets[i].seed_indices[j] = UINT32_MAX; + } + } +} + +__host__ __device__ bool corpus_hash_table_t::contains(uint32_t coverage_hash) { + uint32_t bucket_idx = coverage_hash % CORPUS_BUCKET_COUNT; + const corpus_bucket_t& bucket = buckets[bucket_idx]; + + for (uint32_t i = 0; i < bucket.count && i < 16; i++) { + if (bucket.seed_indices[i] != UINT32_MAX) { + // In a full implementation, we'd compare the actual coverage + // Here we just check if the hash exists + return true; + } + } + return false; +} + +__host__ __device__ void corpus_hash_table_t::insert(uint32_t coverage_hash, uint32_t seed_idx) { + uint32_t bucket_idx = coverage_hash % CORPUS_BUCKET_COUNT; + corpus_bucket_t& bucket = buckets[bucket_idx]; + + if (bucket.count < 16) { + bucket.seed_indices[bucket.count] = seed_idx; + bucket.count++; + } +} + +__host__ __device__ void corpus_hash_table_t::remove(uint32_t coverage_hash, uint32_t seed_idx) { + uint32_t bucket_idx = coverage_hash % CORPUS_BUCKET_COUNT; + corpus_bucket_t& bucket = buckets[bucket_idx]; + + for (uint32_t i = 0; i < bucket.count && i < 16; i++) { + if (bucket.seed_indices[i] == seed_idx) { + // Shift remaining entries + for (uint32_t j = i; j < bucket.count - 1 && j < 15; j++) { + bucket.seed_indices[j] = bucket.seed_indices[j + 1]; + } + bucket.count--; + bucket.seed_indices[bucket.count] = UINT32_MAX; + return; + } + } +} + +// ============================================================================ +// invariant_t Implementation +// ============================================================================ + +__host__ __device__ void invariant_t::init() { + type = InvariantType::STORAGE_EQUALS; + id = 0; + memset(&target_address, 0, sizeof(evm_word_t)); + memset(&slot1, 0, sizeof(evm_word_t)); + memset(&slot2, 0, sizeof(evm_word_t)); + memset(&expected_value, 0, sizeof(evm_word_t)); + memset(&min_value, 0, sizeof(evm_word_t)); + memset(&max_value, 0, sizeof(evm_word_t)); + for (uint32_t i = 0; i < 4; i++) { + memset(&addresses[i], 0, sizeof(evm_word_t)); + memset(&slots[i], 0, sizeof(evm_word_t)); + } + num_slots = 0; + memset(description, 0, sizeof(description)); + enabled = true; + violation_count = 0; +} + +// ============================================================================ +// GPUCorpusManager Implementation +// ============================================================================ + +__host__ GPUCorpusManager::GPUCorpusManager(uint32_t max_size) { + capacity_ = max_size; + coverage_baseline_ = nullptr; + queue_size_ = 0; + + // Allocate seed storage + cudaMallocManaged(&seeds_, sizeof(seed_entry_t) * max_size); + cudaMallocManaged(&free_indices_, sizeof(uint32_t) * max_size); + cudaMallocManaged(&priority_queue_, sizeof(uint32_t) * max_size); + + // Initialize seeds + for (uint32_t i = 0; i < max_size; i++) { + seeds_[i].init(); + free_indices_[i] = max_size - 1 - i; // Stack-based free list + } + free_count_ = max_size; + + stats_.init(); + hash_table_.init(); +} + +__host__ GPUCorpusManager::~GPUCorpusManager() { + // Free seed data + for (uint32_t i = 0; i < capacity_; i++) { + if (seeds_[i].data.data) { + cudaFree(seeds_[i].data.data); + } + } + cudaFree(seeds_); + cudaFree(free_indices_); + cudaFree(priority_queue_); +} + +__host__ __device__ uint32_t GPUCorpusManager::allocate_slot() { + if (free_count_ == 0) return UINT32_MAX; + free_count_--; + return free_indices_[free_count_]; +} + +__host__ __device__ void GPUCorpusManager::deallocate_slot(uint32_t idx) { + if (idx >= capacity_) return; + free_indices_[free_count_] = idx; + free_count_++; +} + +__host__ __device__ bool GPUCorpusManager::add_seed(const seed_entry_t& seed, bool check_duplicate) { + // Check for duplicates + if (check_duplicate && hash_table_.contains(seed.metadata.coverage_hash)) { + return false; + } + + // Allocate slot + uint32_t idx = allocate_slot(); + if (idx == UINT32_MAX) { + return false; + } + + // Copy seed + seeds_[idx].copy_from(seed); + seeds_[idx].metadata.id = stats_.total_seeds_added + 1; + seeds_[idx].metadata.timestamp = get_timestamp(); + + // Update hash table + hash_table_.insert(seed.metadata.coverage_hash, idx); + + // Add to priority queue + if (queue_size_ < capacity_) { + priority_queue_[queue_size_] = idx; + queue_size_++; + } + + stats_.record_new_seed(seed.metadata.parent_id != 0, false); + + return true; +} + +__host__ __device__ bool GPUCorpusManager::add_seed_if_interesting(const seed_entry_t& seed, + const coverage_snapshot_t& coverage, + const bug_storage_t* bugs) { + // Check if this seed adds new coverage + uint32_t new_edges = 0; + uint32_t new_branches = 0; + + // Compare with baseline if available + if (coverage_baseline_) { + // Count new coverage + for (uint32_t i = 0; i < COVERAGE_MAP_SIZE / 32; i++) { + uint32_t new_bits = coverage.edge_bitmap[i] & ~coverage_baseline_->edge_bitmap[i]; + new_edges += __builtin_popcount(new_bits); + } + } else { + // No baseline, count all coverage + for (uint32_t i = 0; i < COVERAGE_MAP_SIZE / 32; i++) { + new_edges += __builtin_popcount(coverage.edge_bitmap[i]); + } + } + + // Check if found new bug + bool found_new_bug = false; + if (bugs && bugs->num_bugs > 0) { + found_new_bug = true; // Simplified check + } + + // Add if interesting + if (new_edges > 0 || new_branches > 0 || found_new_bug) { + seed_entry_t modified_seed = seed; + modified_seed.metadata.unique_edges = new_edges; + modified_seed.metadata.unique_branches = new_branches; + modified_seed.metadata.coverage_contribution = static_cast(new_edges + new_branches); + + if (found_new_bug) { + modified_seed.metadata.energy += ENERGY_NEW_BUG; + modified_seed.metadata.bug_count++; + } else if (new_edges > 0 || new_branches > 0) { + modified_seed.metadata.energy += ENERGY_NEW_COVERAGE; + } + + bool added = add_seed(modified_seed, true); + if (added) { + stats_.update_coverage(new_edges, new_branches); + } + return added; + } + + return false; +} + +__host__ __device__ void GPUCorpusManager::remove_seed(uint32_t idx) { + if (idx >= capacity_) return; + + // Remove from hash table + hash_table_.remove(seeds_[idx].metadata.coverage_hash, idx); + + // Clear seed + seeds_[idx].init(); + + // Return slot to free list + deallocate_slot(idx); + + stats_.total_seeds_removed++; + stats_.current_size--; +} + +__host__ __device__ seed_entry_t* GPUCorpusManager::get_seed(uint32_t idx) { + if (idx >= capacity_) return nullptr; + return &seeds_[idx]; +} + +__host__ __device__ seed_entry_t* GPUCorpusManager::select_seed(curandState* rng) { + if (stats_.current_size == 0) return nullptr; + + // Random selection from priority queue + uint32_t rand_idx; +#ifdef __CUDA_ARCH__ + rand_idx = curand(rng) % queue_size_; +#else + rand_idx = rand() % queue_size_; +#endif + + uint32_t seed_idx = priority_queue_[rand_idx]; + seed_entry_t* seed = &seeds_[seed_idx]; + seed->metadata.execution_count++; + seed->metadata.last_selected = get_timestamp(); + + return seed; +} + +__host__ __device__ seed_entry_t* GPUCorpusManager::select_weighted(curandState* rng) { + if (stats_.current_size == 0) return nullptr; + + // Calculate total energy + uint64_t total_energy = 0; + for (uint32_t i = 0; i < queue_size_; i++) { + total_energy += seeds_[priority_queue_[i]].metadata.energy; + } + + if (total_energy == 0) { + return select_seed(rng); // Fallback to uniform selection + } + + // Weighted random selection + uint64_t target; +#ifdef __CUDA_ARCH__ + target = curand(rng) % total_energy; +#else + target = rand() % total_energy; +#endif + + uint64_t cumulative = 0; + for (uint32_t i = 0; i < queue_size_; i++) { + cumulative += seeds_[priority_queue_[i]].metadata.energy; + if (cumulative > target) { + uint32_t seed_idx = priority_queue_[i]; + seed_entry_t* seed = &seeds_[seed_idx]; + seed->metadata.execution_count++; + seed->metadata.last_selected = get_timestamp(); + return seed; + } + } + + return &seeds_[priority_queue_[queue_size_ - 1]]; +} + +__host__ __device__ void GPUCorpusManager::update_seed_after_execution(uint32_t idx, bool caused_new_coverage, + bool found_bug) { + if (idx >= capacity_) return; + + seed_entry_t* seed = &seeds_[idx]; + seed->metadata.execution_count++; + + if (caused_new_coverage) { + seed->metadata.energy += ENERGY_NEW_COVERAGE; + seed->metadata.child_count++; + } + + if (found_bug) { + seed->metadata.energy += ENERGY_NEW_BUG; + seed->metadata.bug_count++; + stats_.total_bugs_found++; + } + + stats_.total_executions++; +} + +__host__ __device__ uint32_t GPUCorpusManager::compute_coverage_hash(const coverage_snapshot_t& coverage) { + uint32_t hash = 0; + for (uint32_t i = 0; i < COVERAGE_MAP_SIZE / 32; i++) { + hash = hash_combine(hash, coverage.edge_bitmap[i]); + } + return hash; +} + +__host__ __device__ uint32_t GPUCorpusManager::compute_seed_hash(const seed_entry_t& seed) { + if (!seed.data.data || seed.data.length == 0) { + return 0; + } + return fnv1a_hash(seed.data.data, seed.data.length); +} + +__host__ __device__ float GPUCorpusManager::compute_priority(const seed_metadata_t& metadata) { + float priority = 1.0f; + + // Favor seeds with high coverage contribution + priority += metadata.coverage_contribution * 10.0f; + + // Favor bug-finding seeds + priority += metadata.bug_count * 100.0f; + + // Penalize over-mutated seeds + if (metadata.mutation_count > 1000) { + priority *= 0.5f; + } + + // Favor newer seeds + if (metadata.generation < 10) { + priority *= 1.5f; + } + + return priority; +} + +__host__ void GPUCorpusManager::cull_corpus() { + if (stats_.current_size <= MIN_CORPUS_ENTRIES) { + return; + } + + // Remove seeds with low priority + uint32_t target_size = stats_.current_size * 3 / 4; // Keep 75% + if (target_size < MIN_CORPUS_ENTRIES) { + target_size = MIN_CORPUS_ENTRIES; + } + + // Sort by priority (ascending, so worst first) + std::vector> priorities; + for (uint32_t i = 0; i < queue_size_; i++) { + uint32_t idx = priority_queue_[i]; + float pri = compute_priority(seeds_[idx].metadata); + priorities.push_back({pri, idx}); + } + + std::sort(priorities.begin(), priorities.end()); + + // Remove lowest priority seeds + uint32_t to_remove = stats_.current_size - target_size; + for (uint32_t i = 0; i < to_remove && i < priorities.size(); i++) { + remove_seed(priorities[i].second); + } + + compact_corpus(); +} + +__host__ void GPUCorpusManager::compact_corpus() { + // Rebuild priority queue with only valid entries + uint32_t new_size = 0; + for (uint32_t i = 0; i < queue_size_; i++) { + uint32_t idx = priority_queue_[i]; + if (seeds_[idx].metadata.id != 0) { + priority_queue_[new_size] = idx; + new_size++; + } + } + queue_size_ = new_size; +} + +__host__ void GPUCorpusManager::sort_by_priority() { + std::vector> priorities; + for (uint32_t i = 0; i < queue_size_; i++) { + uint32_t idx = priority_queue_[i]; + float pri = compute_priority(seeds_[idx].metadata); + priorities.push_back({pri, idx}); + } + + std::sort(priorities.begin(), priorities.end(), + [](const auto& a, const auto& b) { return a.first > b.first; }); + + for (uint32_t i = 0; i < queue_size_; i++) { + priority_queue_[i] = priorities[i].second; + } +} + +__host__ void GPUCorpusManager::recalculate_energies() { + for (uint32_t i = 0; i < queue_size_; i++) { + uint32_t idx = priority_queue_[i]; + seed_entry_t& seed = seeds_[idx]; + + // Decay energy over time + seed.metadata.energy = seed.metadata.energy / ENERGY_DECAY_FACTOR; + if (seed.metadata.energy < ENERGY_MIN) { + seed.metadata.energy = ENERGY_MIN; + } + + // Recalculate priority + seed.metadata.priority = static_cast(compute_priority(seed.metadata)); + } +} + +__host__ void GPUCorpusManager::minimize_seed(uint32_t idx) { + if (idx >= capacity_) return; + + seed_entry_t* seed = &seeds_[idx]; + if (seed->metadata.minimized) return; + + // Simple minimization: try removing chunks + SeedMinimizer minimizer; + + // For now, just mark as minimized + // Full implementation would use delta debugging + seed->metadata.minimized = true; + seed->metadata.original_length = seed->data.length; +} + +__host__ void GPUCorpusManager::minimize_all() { + for (uint32_t i = 0; i < queue_size_; i++) { + minimize_seed(priority_queue_[i]); + } + stats_.minimized_seeds = queue_size_; +} + +__host__ void GPUCorpusManager::merge_from(const GPUCorpusManager& other) { + for (uint32_t i = 0; i < other.queue_size_; i++) { + uint32_t idx = other.priority_queue_[i]; + const seed_entry_t& seed = other.seeds_[idx]; + add_seed(seed, true); + } +} + +__host__ void GPUCorpusManager::import_seeds(const char* directory) { + DIR* dir = opendir(directory); + if (!dir) return; + + struct dirent* entry; + while ((entry = readdir(dir)) != nullptr) { + if (entry->d_name[0] == '.') continue; + + char filepath[512]; + snprintf(filepath, sizeof(filepath), "%s/%s", directory, entry->d_name); + + // Read seed file + FILE* f = fopen(filepath, "rb"); + if (!f) continue; + + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + + if (size > 0 && size <= MAX_SEED_DATA_SIZE) { + seed_entry_t seed; + seed.init(); + + uint8_t* data; + cudaMallocManaged(&data, size); + fread(data, 1, size, f); + + seed.data.data = data; + seed.data.length = static_cast(size); + seed.data.capacity = static_cast(size); + seed.num_transactions = 1; + seed.tx_offsets[0] = 0; + seed.tx_lengths[0] = static_cast(size); + + add_seed(seed, false); + } + + fclose(f); + } + + closedir(dir); +} + +__host__ void GPUCorpusManager::export_seeds(const char* directory) { + mkdir(directory, 0755); + + for (uint32_t i = 0; i < queue_size_; i++) { + uint32_t idx = priority_queue_[i]; + const seed_entry_t& seed = seeds_[idx]; + + if (!seed.data.data || seed.data.length == 0) continue; + + char filepath[512]; + snprintf(filepath, sizeof(filepath), "%s/seed_%lu.bin", + directory, seed.metadata.id); + + FILE* f = fopen(filepath, "wb"); + if (f) { + fwrite(seed.data.data, 1, seed.data.length, f); + fclose(f); + } + } +} + +__host__ void GPUCorpusManager::export_interesting_seeds(const char* directory, uint32_t max_seeds) { + mkdir(directory, 0755); + + // Sort by priority + sort_by_priority(); + + uint32_t exported = 0; + for (uint32_t i = 0; i < queue_size_ && exported < max_seeds; i++) { + uint32_t idx = priority_queue_[i]; + const seed_entry_t& seed = seeds_[idx]; + + if (!seed.data.data || seed.data.length == 0) continue; + + char filepath[512]; + snprintf(filepath, sizeof(filepath), "%s/interesting_%u_id%lu.bin", + directory, exported, seed.metadata.id); + + FILE* f = fopen(filepath, "wb"); + if (f) { + fwrite(seed.data.data, 1, seed.data.length, f); + fclose(f); + exported++; + } + } +} + +__host__ void GPUCorpusManager::save_checkpoint(const char* filename) { + FILE* f = fopen(filename, "wb"); + if (!f) return; + + // Write stats + fwrite(&stats_, sizeof(corpus_stats_t), 1, f); + + // Write number of seeds + fwrite(&queue_size_, sizeof(uint32_t), 1, f); + + // Write each seed + for (uint32_t i = 0; i < queue_size_; i++) { + uint32_t idx = priority_queue_[i]; + const seed_entry_t& seed = seeds_[idx]; + + // Write metadata + fwrite(&seed.metadata, sizeof(seed_metadata_t), 1, f); + fwrite(&seed.num_transactions, sizeof(uint32_t), 1, f); + fwrite(seed.tx_offsets, sizeof(uint32_t), MAX_SEQUENCE_LENGTH, f); + fwrite(seed.tx_lengths, sizeof(uint32_t), MAX_SEQUENCE_LENGTH, f); + fwrite(seed.senders, sizeof(evm_word_t), MAX_SEQUENCE_LENGTH, f); + fwrite(seed.values, sizeof(evm_word_t), MAX_SEQUENCE_LENGTH, f); + + // Write data + fwrite(&seed.data.length, sizeof(uint32_t), 1, f); + if (seed.data.length > 0 && seed.data.data) { + fwrite(seed.data.data, 1, seed.data.length, f); + } + } + + fclose(f); +} + +__host__ void GPUCorpusManager::load_checkpoint(const char* filename) { + FILE* f = fopen(filename, "rb"); + if (!f) return; + + // Read stats + fread(&stats_, sizeof(corpus_stats_t), 1, f); + + // Read number of seeds + uint32_t num_seeds; + fread(&num_seeds, sizeof(uint32_t), 1, f); + + // Read each seed + for (uint32_t i = 0; i < num_seeds; i++) { + seed_entry_t seed; + seed.init(); + + // Read metadata + fread(&seed.metadata, sizeof(seed_metadata_t), 1, f); + fread(&seed.num_transactions, sizeof(uint32_t), 1, f); + fread(seed.tx_offsets, sizeof(uint32_t), MAX_SEQUENCE_LENGTH, f); + fread(seed.tx_lengths, sizeof(uint32_t), MAX_SEQUENCE_LENGTH, f); + fread(seed.senders, sizeof(evm_word_t), MAX_SEQUENCE_LENGTH, f); + fread(seed.values, sizeof(evm_word_t), MAX_SEQUENCE_LENGTH, f); + + // Read data + uint32_t data_len; + fread(&data_len, sizeof(uint32_t), 1, f); + if (data_len > 0) { + cudaMallocManaged(&seed.data.data, data_len); + fread(seed.data.data, 1, data_len, f); + seed.data.length = data_len; + seed.data.capacity = data_len; + } + + add_seed(seed, false); + } + + fclose(f); +} + +__host__ void GPUCorpusManager::set_coverage_baseline(const gpu_coverage_map_t* baseline) { + coverage_baseline_ = const_cast(baseline); +} + +__host__ void GPUCorpusManager::update_coverage_contribution(uint32_t seed_idx, + const coverage_snapshot_t& new_coverage) { + if (seed_idx >= capacity_) return; + + seed_entry_t* seed = &seeds_[seed_idx]; + + // Calculate contribution + uint32_t contribution = 0; + for (uint32_t i = 0; i < COVERAGE_MAP_SIZE / 32; i++) { + contribution += __builtin_popcount(new_coverage.edge_bitmap[i]); + } + + seed->metadata.coverage_contribution = static_cast(contribution); + seed->metadata.coverage_hash = compute_coverage_hash(new_coverage); +} + +__host__ void GPUCorpusManager::print_stats() { + printf("=== Corpus Statistics ===\n"); + printf("Current size: %u / %u\n", stats_.current_size, capacity_); + printf("Total seeds added: %lu\n", stats_.total_seeds_added); + printf("Total seeds removed: %lu\n", stats_.total_seeds_removed); + printf("Total executions: %lu\n", stats_.total_executions); + printf("Total mutations: %lu\n", stats_.total_mutations); + printf("Unique coverage edges: %u\n", stats_.unique_coverage_edges); + printf("Unique coverage branches: %u\n", stats_.unique_coverage_branches); + printf("Coverage: %.2f%%\n", stats_.overall_coverage_percent); + printf("Bugs found: %lu\n", stats_.total_bugs_found); + printf("Initial seeds: %u\n", stats_.initial_seeds); + printf("Mutant seeds: %u\n", stats_.mutant_seeds); + printf("Minimized seeds: %u\n", stats_.minimized_seeds); + printf("Cycles since progress: %u\n", stats_.cycles_since_progress); + printf("=========================\n"); +} + +__host__ void GPUCorpusManager::export_stats_json(const char* filename) { + FILE* f = fopen(filename, "w"); + if (!f) return; + + fprintf(f, "{\n"); + fprintf(f, " \"current_size\": %u,\n", stats_.current_size); + fprintf(f, " \"capacity\": %u,\n", capacity_); + fprintf(f, " \"total_seeds_added\": %lu,\n", stats_.total_seeds_added); + fprintf(f, " \"total_seeds_removed\": %lu,\n", stats_.total_seeds_removed); + fprintf(f, " \"total_executions\": %lu,\n", stats_.total_executions); + fprintf(f, " \"total_mutations\": %lu,\n", stats_.total_mutations); + fprintf(f, " \"unique_coverage_edges\": %u,\n", stats_.unique_coverage_edges); + fprintf(f, " \"unique_coverage_branches\": %u,\n", stats_.unique_coverage_branches); + fprintf(f, " \"overall_coverage_percent\": %.4f,\n", stats_.overall_coverage_percent); + fprintf(f, " \"total_bugs_found\": %lu,\n", stats_.total_bugs_found); + fprintf(f, " \"initial_seeds\": %u,\n", stats_.initial_seeds); + fprintf(f, " \"mutant_seeds\": %u,\n", stats_.mutant_seeds); + fprintf(f, " \"splice_seeds\": %u,\n", stats_.splice_seeds); + fprintf(f, " \"minimized_seeds\": %u,\n", stats_.minimized_seeds); + fprintf(f, " \"cycles_since_progress\": %u\n", stats_.cycles_since_progress); + fprintf(f, "}\n"); + + fclose(f); +} + +// ============================================================================ +// SeedMinimizer Implementation +// ============================================================================ + +__host__ SeedMinimizer::SeedMinimizer() {} + +__host__ bool SeedMinimizer::minimize(seed_entry_t* seed, + bool (*test_fn)(const seed_entry_t*, void*), + void* test_ctx) { + if (!seed || !seed->data.data || seed->data.length < 2) { + return false; + } + + // Try sequence minimization first if it's a multi-tx seed + if (seed->num_transactions > 1) { + minimize_sequence(seed, test_fn, test_ctx); + } + + // Then minimize individual calldata + bool reduced = false; + for (uint32_t tx_idx = 0; tx_idx < seed->num_transactions; tx_idx++) { + uint8_t* tx_data = seed->data.data + seed->tx_offsets[tx_idx]; + uint32_t tx_len = seed->tx_lengths[tx_idx]; + + // Create wrapper test function for single transaction + auto single_tx_test = [&](const uint8_t* data, uint32_t len) -> bool { + // Temporarily modify seed + uint32_t orig_len = seed->tx_lengths[tx_idx]; + seed->tx_lengths[tx_idx] = len; + memcpy(tx_data, data, len); + + bool result = test_fn(seed, test_ctx); + + // Restore if test failed + if (!result) { + seed->tx_lengths[tx_idx] = orig_len; + } + return result; + }; + + // Delta debugging on this transaction + uint32_t new_len = tx_len; + if (ddmin(tx_data, &new_len, 4, nullptr, nullptr)) { + seed->tx_lengths[tx_idx] = new_len; + reduced = true; + } + } + + seed->metadata.minimized = true; + seed->metadata.original_length = seed->data.length; + + return reduced; +} + +__host__ bool SeedMinimizer::minimize_sequence(seed_entry_t* seed, + bool (*test_fn)(const seed_entry_t*, void*), + void* test_ctx) { + if (seed->num_transactions <= 1) { + return false; + } + + bool reduced = false; + + // Try removing transactions one at a time + for (uint32_t i = seed->num_transactions; i > 0; i--) { + uint32_t tx_to_remove = i - 1; + + // Create a copy without this transaction + seed_entry_t test_seed; + test_seed.init(); + + uint32_t new_idx = 0; + uint32_t new_offset = 0; + for (uint32_t j = 0; j < seed->num_transactions; j++) { + if (j == tx_to_remove) continue; + + // Copy transaction + test_seed.tx_offsets[new_idx] = new_offset; + test_seed.tx_lengths[new_idx] = seed->tx_lengths[j]; + test_seed.senders[new_idx] = seed->senders[j]; + test_seed.values[new_idx] = seed->values[j]; + + new_offset += seed->tx_lengths[j]; + new_idx++; + } + test_seed.num_transactions = new_idx; + + // Allocate and copy data + if (new_offset > 0) { + cudaMallocManaged(&test_seed.data.data, new_offset); + test_seed.data.length = new_offset; + test_seed.data.capacity = new_offset; + + uint32_t copy_offset = 0; + for (uint32_t j = 0; j < seed->num_transactions; j++) { + if (j == tx_to_remove) continue; + memcpy(test_seed.data.data + copy_offset, + seed->data.data + seed->tx_offsets[j], + seed->tx_lengths[j]); + copy_offset += seed->tx_lengths[j]; + } + } + + // Test if still interesting + if (test_fn(&test_seed, test_ctx)) { + // Reduction successful, update original seed + seed->copy_from(test_seed); + reduced = true; + i--; // Recheck current position + } + + // Free test seed data + if (test_seed.data.data) { + cudaFree(test_seed.data.data); + } + } + + return reduced; +} + +__host__ bool SeedMinimizer::minimize_calldata(uint8_t* data, uint32_t* length, + bool (*test_fn)(const uint8_t*, uint32_t, void*), + void* test_ctx) { + return ddmin(data, length, 4, test_fn, test_ctx); +} + +__host__ bool SeedMinimizer::ddmin(uint8_t* data, uint32_t* length, uint32_t granularity, + bool (*test_fn)(const uint8_t*, uint32_t, void*), + void* test_ctx) { + if (*length < granularity * 2) { + return false; + } + + bool reduced = false; + uint32_t n = granularity; + + while (n <= *length / 2) { + uint32_t chunk_size = *length / n; + bool chunk_removed = false; + + for (uint32_t i = 0; i < n && !chunk_removed; i++) { + uint32_t start = i * chunk_size; + uint32_t end = (i == n - 1) ? *length : (i + 1) * chunk_size; + uint32_t remove_size = end - start; + + // Create reduced data + uint32_t new_len = *length - remove_size; + uint8_t* new_data = new uint8_t[new_len]; + + memcpy(new_data, data, start); + memcpy(new_data + start, data + end, *length - end); + + // Test if still triggers behavior + bool still_triggers = true; + if (test_fn) { + still_triggers = test_fn(new_data, new_len, test_ctx); + } + + if (still_triggers) { + // Reduction successful + memcpy(data, new_data, new_len); + *length = new_len; + reduced = true; + chunk_removed = true; + n = granularity; // Reset to try larger chunks again + } + + delete[] new_data; + } + + if (!chunk_removed) { + n *= 2; + } + } + + return reduced; +} + +// ============================================================================ +// CorpusDistiller Implementation +// ============================================================================ + +__host__ CorpusDistiller::CorpusDistiller(GPUCorpusManager* corpus) + : source_corpus_(corpus) {} + +__host__ void CorpusDistiller::distill(GPUCorpusManager* output_corpus, + const gpu_coverage_map_t* target_coverage) { + greedy_cover(output_corpus, target_coverage); +} + +__host__ void CorpusDistiller::greedy_cover(GPUCorpusManager* output_corpus, + const gpu_coverage_map_t* target_coverage) { + if (!source_corpus_ || !output_corpus) return; + + // Track which coverage bits we still need + std::vector uncovered(COVERAGE_MAP_SIZE / 32); + for (uint32_t i = 0; i < COVERAGE_MAP_SIZE / 32; i++) { + uncovered[i] = target_coverage->edge_bitmap[i]; + } + + uint32_t total_uncovered = 0; + for (uint32_t i = 0; i < COVERAGE_MAP_SIZE / 32; i++) { + total_uncovered += __builtin_popcount(uncovered[i]); + } + + // Greedy selection + corpus_stats_t* stats = source_corpus_->get_stats(); + std::vector selected(stats->current_size, false); + + while (total_uncovered > 0) { + uint32_t best_idx = UINT32_MAX; + uint32_t best_contribution = 0; + + // Find seed that covers most uncovered bits + for (uint32_t i = 0; i < stats->current_size; i++) { + if (selected[i]) continue; + + seed_entry_t* seed = source_corpus_->get_seed(i); + if (!seed) continue; + + // Count how many uncovered bits this seed covers + uint32_t contribution = 0; + // In a real implementation, we'd need the seed's coverage bitmap + // For now, use the coverage hash as a proxy + contribution = seed->metadata.unique_edges; + + if (contribution > best_contribution) { + best_contribution = contribution; + best_idx = i; + } + } + + if (best_idx == UINT32_MAX) break; + + // Add best seed to output + seed_entry_t* best_seed = source_corpus_->get_seed(best_idx); + output_corpus->add_seed(*best_seed, false); + selected[best_idx] = true; + + // Update uncovered (simplified) + total_uncovered -= best_contribution; + if (total_uncovered > stats->unique_coverage_edges) { + total_uncovered = 0; // Prevent underflow + } + } +} + +// ============================================================================ +// InvariantChecker Implementation +// ============================================================================ + +__host__ __device__ InvariantChecker::InvariantChecker() { + num_invariants_ = 0; + for (uint32_t i = 0; i < MAX_INVARIANTS; i++) { + invariants_[i].init(); + } +} + +__host__ __device__ uint32_t InvariantChecker::add_invariant(const invariant_t& inv) { + if (num_invariants_ >= MAX_INVARIANTS) { + return UINT32_MAX; + } + + uint32_t id = num_invariants_; + invariants_[num_invariants_] = inv; + invariants_[num_invariants_].id = id; + num_invariants_++; + + return id; +} + +__host__ __device__ void InvariantChecker::remove_invariant(uint32_t id) { + if (id >= num_invariants_) return; + + // Shift remaining invariants + for (uint32_t i = id; i < num_invariants_ - 1; i++) { + invariants_[i] = invariants_[i + 1]; + invariants_[i].id = i; + } + num_invariants_--; +} + +__host__ __device__ void InvariantChecker::enable_invariant(uint32_t id, bool enabled) { + if (id < num_invariants_) { + invariants_[id].enabled = enabled; + } +} + +__host__ __device__ void InvariantChecker::check_all(const evm_word_t* storage, + const evm_word_t* balances, + uint32_t tx_index, + invariant_result_t* results, + uint32_t* num_violations) { + *num_violations = 0; + + for (uint32_t i = 0; i < num_invariants_; i++) { + if (!invariants_[i].enabled) continue; + + invariant_result_t result; + if (check_single(i, storage, balances, &result)) { + if (result.violated) { + result.tx_index = tx_index; + result.timestamp = get_timestamp(); + results[*num_violations] = result; + (*num_violations)++; + invariants_[i].violation_count++; + } + } + } +} + +__host__ __device__ bool InvariantChecker::check_single(uint32_t id, + const evm_word_t* storage, + const evm_word_t* balances, + invariant_result_t* result) { + if (id >= num_invariants_) return false; + + const invariant_t& inv = invariants_[id]; + result->invariant_id = id; + result->violated = false; + + switch (inv.type) { + case InvariantType::STORAGE_EQUALS: + result->violated = !check_storage_equals(inv, storage); + break; + + case InvariantType::STORAGE_NOT_ZERO: + case InvariantType::STORAGE_LESS_THAN: + case InvariantType::STORAGE_GREATER_THAN: + case InvariantType::STORAGE_IN_RANGE: + result->violated = !check_storage_range(inv, storage); + break; + + case InvariantType::BALANCE_CONSERVED: + result->violated = !check_balance_conserved(inv, balances); + break; + + case InvariantType::SUM_EQUALS: + case InvariantType::RATIO_MAINTAINED: + result->violated = !check_sum_equals(inv, storage); + break; + + default: + // Unknown invariant type + break; + } + + return true; +} + +__host__ __device__ bool InvariantChecker::check_storage_equals(const invariant_t& inv, + const evm_word_t* storage) { + if (!storage) return true; + + // Get slot index (simplified - in reality would need to compute storage location) + uint32_t slot_idx = inv.slot1._limbs[0] % 1024; // Assume max 1024 storage slots + + // Compare with expected value + for (int i = 0; i < 8; i++) { + if (storage[slot_idx]._limbs[i] != inv.expected_value._limbs[i]) { + return false; + } + } + return true; +} + +__host__ __device__ bool InvariantChecker::check_storage_range(const invariant_t& inv, + const evm_word_t* storage) { + if (!storage) return true; + + uint32_t slot_idx = inv.slot1._limbs[0] % 1024; + + // Simplified comparison using first limb only + uint32_t value = storage[slot_idx]._limbs[0]; + + switch (inv.type) { + case InvariantType::STORAGE_NOT_ZERO: + // Check if any limb is non-zero + for (int i = 0; i < 8; i++) { + if (storage[slot_idx]._limbs[i] != 0) return true; + } + return false; + + case InvariantType::STORAGE_LESS_THAN: + return value < inv.max_value._limbs[0]; + + case InvariantType::STORAGE_GREATER_THAN: + return value > inv.min_value._limbs[0]; + + case InvariantType::STORAGE_IN_RANGE: + return value >= inv.min_value._limbs[0] && value <= inv.max_value._limbs[0]; + + default: + return true; + } +} + +__host__ __device__ bool InvariantChecker::check_balance_conserved(const invariant_t& inv, + const evm_word_t* balances) { + if (!balances) return true; + + // Sum up balances for tracked addresses + uint64_t total = 0; + for (uint32_t i = 0; i < inv.num_slots && i < 4; i++) { + uint32_t addr_idx = inv.addresses[i]._limbs[0] % 256; + total += balances[addr_idx]._limbs[0]; + } + + // Check against expected total + return total == inv.expected_value._limbs[0]; +} + +__host__ __device__ bool InvariantChecker::check_sum_equals(const invariant_t& inv, + const evm_word_t* storage) { + if (!storage) return true; + + // Sum storage slots + uint64_t sum = 0; + for (uint32_t i = 0; i < inv.num_slots && i < 4; i++) { + uint32_t slot_idx = inv.slots[i]._limbs[0] % 1024; + sum += storage[slot_idx]._limbs[0]; + } + + // Check against expected sum + return sum == inv.expected_value._limbs[0]; +} + +__host__ void InvariantChecker::add_erc20_invariants(const evm_word_t& token_address) { + // Total supply equals sum of all balances + invariant_t supply_inv; + supply_inv.init(); + supply_inv.type = InvariantType::TOTAL_SUPPLY_CONSERVED; + supply_inv.target_address = token_address; + snprintf(supply_inv.description, sizeof(supply_inv.description), + "ERC20: Total supply must equal sum of balances"); + add_invariant(supply_inv); + + // Balance cannot exceed total supply + invariant_t balance_inv; + balance_inv.init(); + balance_inv.type = InvariantType::STORAGE_LESS_THAN; + balance_inv.target_address = token_address; + snprintf(balance_inv.description, sizeof(balance_inv.description), + "ERC20: Individual balance cannot exceed total supply"); + add_invariant(balance_inv); +} + +__host__ void InvariantChecker::add_erc721_invariants(const evm_word_t& token_address) { + // Each token has exactly one owner + invariant_t owner_inv; + owner_inv.init(); + owner_inv.type = InvariantType::STORAGE_NOT_ZERO; + owner_inv.target_address = token_address; + snprintf(owner_inv.description, sizeof(owner_inv.description), + "ERC721: Each minted token must have an owner"); + add_invariant(owner_inv); +} + +__host__ void InvariantChecker::add_erc4626_invariants(const evm_word_t& vault_address) { + // Asset/share ratio invariant + invariant_t ratio_inv; + ratio_inv.init(); + ratio_inv.type = InvariantType::ERC4626_ASSET_SHARE_RATIO; + ratio_inv.target_address = vault_address; + snprintf(ratio_inv.description, sizeof(ratio_inv.description), + "ERC4626: Asset/share ratio must be maintained"); + add_invariant(ratio_inv); +} + +__host__ void InvariantChecker::add_amm_invariants(const evm_word_t& pool_address) { + // Constant product invariant + invariant_t k_inv; + k_inv.init(); + k_inv.type = InvariantType::AMM_K_CONSERVED; + k_inv.target_address = pool_address; + snprintf(k_inv.description, sizeof(k_inv.description), + "AMM: Constant product k must be maintained (x * y >= k)"); + add_invariant(k_inv); +} + +__host__ void InvariantChecker::add_lending_invariants(const evm_word_t& protocol_address) { + // Collateral ratio invariant + invariant_t collateral_inv; + collateral_inv.init(); + collateral_inv.type = InvariantType::LENDING_COLLATERAL_RATIO; + collateral_inv.target_address = protocol_address; + snprintf(collateral_inv.description, sizeof(collateral_inv.description), + "Lending: Collateral ratio must be maintained"); + add_invariant(collateral_inv); +} + +__host__ void InvariantChecker::load_from_json(const char* filename) { + FILE* f = fopen(filename, "r"); + if (!f) return; + + // Simple JSON parsing for invariants + char line[512]; + invariant_t current_inv; + current_inv.init(); + bool in_invariant = false; + + while (fgets(line, sizeof(line), f)) { + // Very basic parsing + if (strstr(line, "\"type\":")) { + char* type_str = strstr(line, ":"); + if (type_str) { + int type_val = atoi(type_str + 1); + current_inv.type = static_cast(type_val); + } + } else if (strstr(line, "\"description\":")) { + char* desc_start = strchr(line, '"'); + if (desc_start) { + desc_start = strchr(desc_start + 1, '"'); + if (desc_start) { + desc_start++; + char* desc_end = strchr(desc_start, '"'); + if (desc_end) { + size_t len = desc_end - desc_start; + if (len >= sizeof(current_inv.description)) { + len = sizeof(current_inv.description) - 1; + } + strncpy(current_inv.description, desc_start, len); + current_inv.description[len] = '\0'; + } + } + } + } else if (strstr(line, "\"enabled\":")) { + current_inv.enabled = strstr(line, "true") != nullptr; + } else if (strstr(line, "}")) { + // End of invariant object + if (current_inv.type != InvariantType::STORAGE_EQUALS || current_inv.description[0] != '\0') { + add_invariant(current_inv); + current_inv.init(); + } + } + } + + fclose(f); +} + +__host__ void InvariantChecker::save_to_json(const char* filename) { + FILE* f = fopen(filename, "w"); + if (!f) return; + + fprintf(f, "{\n \"invariants\": [\n"); + + for (uint32_t i = 0; i < num_invariants_; i++) { + const invariant_t& inv = invariants_[i]; + fprintf(f, " {\n"); + fprintf(f, " \"id\": %u,\n", inv.id); + fprintf(f, " \"type\": %d,\n", static_cast(inv.type)); + fprintf(f, " \"description\": \"%s\",\n", inv.description); + fprintf(f, " \"enabled\": %s,\n", inv.enabled ? "true" : "false"); + fprintf(f, " \"violation_count\": %u\n", inv.violation_count); + fprintf(f, " }%s\n", (i < num_invariants_ - 1) ? "," : ""); + } + + fprintf(f, " ]\n}\n"); + fclose(f); +} + +__host__ __device__ uint32_t InvariantChecker::get_violation_count(uint32_t id) { + if (id >= num_invariants_) return 0; + return invariants_[id].violation_count; +} + +__host__ __device__ uint32_t InvariantChecker::get_total_violations() { + uint32_t total = 0; + for (uint32_t i = 0; i < num_invariants_; i++) { + total += invariants_[i].violation_count; + } + return total; +} + +// ============================================================================ +// CUDA Kernels +// ============================================================================ + +__global__ void kernel_select_seeds( + seed_entry_t* seeds, + uint32_t num_seeds, + uint32_t* selected_indices, + uint32_t num_to_select, + curandState* rng_states +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_to_select) return; + + curandState local_state = rng_states[idx]; + + // Weighted selection + uint64_t total_energy = 0; + for (uint32_t i = 0; i < num_seeds; i++) { + total_energy += seeds[i].metadata.energy; + } + + if (total_energy == 0) { + // Uniform selection + selected_indices[idx] = curand(&local_state) % num_seeds; + } else { + // Weighted selection + uint64_t target = curand(&local_state) % total_energy; + uint64_t cumulative = 0; + + for (uint32_t i = 0; i < num_seeds; i++) { + cumulative += seeds[i].metadata.energy; + if (cumulative > target) { + selected_indices[idx] = i; + break; + } + } + } + + rng_states[idx] = local_state; +} + +__global__ void kernel_update_energies( + seed_entry_t* seeds, + uint32_t num_seeds, + float decay_factor +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_seeds) return; + + seed_entry_t& seed = seeds[idx]; + + // Apply decay + float new_energy = seed.metadata.energy / decay_factor; + if (new_energy < ENERGY_MIN) { + new_energy = ENERGY_MIN; + } + seed.metadata.energy = static_cast(new_energy); + + // Recalculate priority + float priority = 1.0f; + priority += seed.metadata.coverage_contribution * 10.0f; + priority += seed.metadata.bug_count * 100.0f; + if (seed.metadata.mutation_count > 1000) { + priority *= 0.5f; + } + seed.metadata.priority = static_cast(priority); +} + +__global__ void kernel_check_invariants( + InvariantChecker* checker, + const evm_word_t* storages, + const evm_word_t* balances, + uint32_t num_instances, + invariant_result_t* results, + uint32_t* violation_counts +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_instances) return; + + // Each instance has its own storage/balance state + const evm_word_t* instance_storage = storages + idx * 1024; // Assume 1024 slots per instance + const evm_word_t* instance_balances = balances + idx * 256; // Assume 256 addresses per instance + + // Results for this instance + invariant_result_t* instance_results = results + idx * MAX_INVARIANTS; + uint32_t violations = 0; + + checker->check_all(instance_storage, instance_balances, idx, instance_results, &violations); + violation_counts[idx] = violations; +} + +__global__ void kernel_compute_coverage_hashes( + const coverage_snapshot_t* snapshots, + uint32_t num_snapshots, + uint32_t* hashes +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_snapshots) return; + + const coverage_snapshot_t& snapshot = snapshots[idx]; + + // FNV-1a hash of coverage bitmap + uint32_t hash = 2166136261u; + for (uint32_t i = 0; i < COVERAGE_MAP_SIZE / 32; i++) { + hash ^= snapshot.edge_bitmap[i]; + hash *= 16777619u; + } + + hashes[idx] = hash; +} + +// ============================================================================ +// Host Helper Functions +// ============================================================================ + +__host__ GPUCorpusManager* allocate_corpus_manager(uint32_t max_size) { + GPUCorpusManager* manager; + cudaMallocManaged(&manager, sizeof(GPUCorpusManager)); + new (manager) GPUCorpusManager(max_size); + return manager; +} + +__host__ void free_corpus_manager(GPUCorpusManager* manager) { + if (manager) { + manager->~GPUCorpusManager(); + cudaFree(manager); + } +} + +__host__ InvariantChecker* allocate_invariant_checker() { + InvariantChecker* checker; + cudaMallocManaged(&checker, sizeof(InvariantChecker)); + new (checker) InvariantChecker(); + return checker; +} + +__host__ void free_invariant_checker(InvariantChecker* checker) { + if (checker) { + checker->~InvariantChecker(); + cudaFree(checker); + } +} + +__host__ void generate_initial_corpus(GPUCorpusManager* corpus, + const uint8_t* contract_abi, + uint32_t abi_length) { + if (!corpus || !contract_abi || abi_length == 0) return; + + // Parse ABI to find function selectors + // This is a simplified implementation - real version would parse JSON ABI + + // Common function selectors for testing + uint8_t selectors[][4] = { + {0xa9, 0x05, 0x9c, 0xbb}, // transfer(address,uint256) + {0x23, 0xb8, 0x72, 0xdd}, // transferFrom(address,address,uint256) + {0x09, 0x5e, 0xa7, 0xb3}, // approve(address,uint256) + {0x70, 0xa0, 0x82, 0x31}, // balanceOf(address) + {0x18, 0x16, 0x0d, 0xdd}, // totalSupply() + {0xdd, 0x62, 0xed, 0x3e}, // allowance(address,address) + {0x40, 0xc1, 0x0f, 0x19}, // mint(address,uint256) + {0x42, 0x96, 0x6c, 0x68}, // burn(uint256) + }; + + // Create initial seeds for each function + for (size_t i = 0; i < sizeof(selectors) / sizeof(selectors[0]); i++) { + seed_entry_t seed; + seed.init(); + + // Create minimal calldata with selector and zero args + uint32_t calldata_len = 4 + 64; // Selector + 2 args + uint8_t* calldata; + cudaMallocManaged(&calldata, calldata_len); + memset(calldata, 0, calldata_len); + memcpy(calldata, selectors[i], 4); + + seed.data.data = calldata; + seed.data.length = calldata_len; + seed.data.capacity = calldata_len; + seed.num_transactions = 1; + seed.tx_offsets[0] = 0; + seed.tx_lengths[0] = calldata_len; + + corpus->add_seed(seed, false); + } + + // Add edge case seeds + // Empty calldata + { + seed_entry_t seed; + seed.init(); + uint8_t* calldata; + cudaMallocManaged(&calldata, 4); + memset(calldata, 0, 4); + seed.data.data = calldata; + seed.data.length = 4; + seed.data.capacity = 4; + seed.num_transactions = 1; + seed.tx_offsets[0] = 0; + seed.tx_lengths[0] = 4; + corpus->add_seed(seed, false); + } + + // Random selector + { + seed_entry_t seed; + seed.init(); + uint8_t* calldata; + cudaMallocManaged(&calldata, 4); + calldata[0] = 0xDE; + calldata[1] = 0xAD; + calldata[2] = 0xBE; + calldata[3] = 0xEF; + seed.data.data = calldata; + seed.data.length = 4; + seed.data.capacity = 4; + seed.num_transactions = 1; + seed.tx_offsets[0] = 0; + seed.tx_lengths[0] = 4; + corpus->add_seed(seed, false); + } +} + +} // namespace fuzzing +} // namespace CuEVM diff --git a/CuEVM/src/fuzzing/coverage.cu b/CuEVM/src/fuzzing/coverage.cu new file mode 100644 index 0000000..6f322c5 --- /dev/null +++ b/CuEVM/src/fuzzing/coverage.cu @@ -0,0 +1,720 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// GPU Coverage Instrumentation Implementation for NVIDIA B300 +// SPDX-License-Identifier: MIT + +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// ============================================================================ +// GPU Coverage Map Implementation +// ============================================================================ + +__host__ __device__ void gpu_coverage_map_t::init() { + num_branch_entries = 0; + num_storage_entries = 0; + num_call_entries = 0; + num_contracts = 0; + total_instructions_executed = 0; + total_branches_executed = 0; + total_storage_ops = 0; + total_calls = 0; + total_gas_used = 0; + unique_pcs = 0; + unique_edges = 0; + unique_branches = 0; + overall_coverage = 0.0f; +} + +__host__ __device__ void gpu_coverage_map_t::reset() { + if (pc_bitmap) { + for (uint32_t i = 0; i < PC_COVERAGE_SIZE; i++) { + pc_bitmap[i] = 0; + } + } + if (edge_bitmap) { + for (uint32_t i = 0; i < EDGE_COVERAGE_SIZE; i++) { + edge_bitmap[i] = 0; + } + } + if (opcode_counters) { + for (uint32_t i = 0; i < OPCODE_COVERAGE_SIZE; i++) { + opcode_counters[i] = 0; + } + } + init(); +} + +__host__ __device__ void gpu_coverage_map_t::merge(const gpu_coverage_map_t& other) { + // Merge bitmap counters using saturating addition + for (uint32_t i = 0; i < PC_COVERAGE_SIZE; i++) { + uint16_t sum = (uint16_t)pc_bitmap[i] + (uint16_t)other.pc_bitmap[i]; + pc_bitmap[i] = (sum > 255) ? 255 : (coverage_counter_t)sum; + } + + for (uint32_t i = 0; i < EDGE_COVERAGE_SIZE; i++) { + uint16_t sum = (uint16_t)edge_bitmap[i] + (uint16_t)other.edge_bitmap[i]; + edge_bitmap[i] = (sum > 255) ? 255 : (coverage_counter_t)sum; + } + + for (uint32_t i = 0; i < OPCODE_COVERAGE_SIZE; i++) { + uint16_t sum = (uint16_t)opcode_counters[i] + (uint16_t)other.opcode_counters[i]; + opcode_counters[i] = (sum > 255) ? 255 : (coverage_counter_t)sum; + } + + // Merge statistics + total_instructions_executed += other.total_instructions_executed; + total_branches_executed += other.total_branches_executed; + total_storage_ops += other.total_storage_ops; + total_calls += other.total_calls; + total_gas_used += other.total_gas_used; +} + +// ============================================================================ +// Instance Coverage Implementation +// ============================================================================ + +__host__ __device__ void instance_coverage_t::init() { + edge_hash_idx = 0; + branch_hash_idx = 0; + storage_hash_idx = 0; + pcs_hit = 0; + edges_hit = 0; + branches_taken = 0; + storage_ops = 0; + calls_made = 0; + last_pc = 0; + last_opcode = 0; + + for (int i = 0; i < 256; i++) edge_hashes[i] = 0; + for (int i = 0; i < 64; i++) branch_hashes[i] = 0; + for (int i = 0; i < 64; i++) storage_hashes[i] = 0; +} + +__host__ __device__ void instance_coverage_t::record_pc(uint32_t pc) { + pcs_hit++; + last_pc = pc; +} + +__host__ __device__ void instance_coverage_t::record_edge(uint32_t from_pc, uint32_t to_pc) { + // AFL-style edge hashing + uint32_t hash = (from_pc >> 1) ^ to_pc; + edge_hashes[edge_hash_idx & 255] = hash; + edge_hash_idx++; + edges_hit++; +} + +__host__ __device__ void instance_coverage_t::record_branch(uint32_t pc, bool taken, uint64_t distance) { + uint32_t hash = pc | (taken ? 0x80000000 : 0); + branch_hashes[branch_hash_idx & 63] = hash; + branch_hash_idx++; + branches_taken++; +} + +__host__ __device__ void instance_coverage_t::record_storage(uint32_t pc, uint32_t slot_hash, bool is_write) { + uint32_t hash = (pc << 16) ^ slot_hash ^ (is_write ? 0x1 : 0x0); + storage_hashes[storage_hash_idx & 63] = hash; + storage_hash_idx++; + storage_ops++; +} + +__host__ __device__ void instance_coverage_t::record_call(uint32_t pc, uint32_t target_hash, uint8_t opcode, bool success) { + calls_made++; +} + +// ============================================================================ +// Coverage Instrumentation Implementation +// ============================================================================ + +__host__ __device__ CoverageInstrumentation::CoverageInstrumentation( + gpu_coverage_map_t* global_map, instance_coverage_t* instance) + : global_map_(global_map), instance_(instance) {} + +__host__ __device__ void CoverageInstrumentation::on_instruction_start(uint32_t pc, uint8_t opcode) { + instance_->record_pc(pc); + + // Track edge from last PC + if (instance_->last_pc != 0) { + instance_->record_edge(instance_->last_pc, pc); + } + + instance_->last_opcode = opcode; +} + +__host__ __device__ void CoverageInstrumentation::on_instruction_end(uint32_t pc, uint8_t opcode, uint32_t error_code) { + instance_->last_pc = pc; + + // Update global statistics atomically +#ifdef __CUDA_ARCH__ + atomicAdd(&global_map_->total_instructions_executed, 1ULL); +#else + global_map_->total_instructions_executed++; +#endif +} + +__host__ __device__ void CoverageInstrumentation::on_jump(uint32_t from_pc, uint32_t to_pc) { + instance_->record_edge(from_pc, to_pc); + + // Update edge bitmap + uint32_t edge_hash = hash_edge(from_pc, to_pc); + uint32_t index = edge_hash % EDGE_COVERAGE_SIZE; + +#ifdef __CUDA_ARCH__ + atomicAdd((unsigned char*)&global_map_->edge_bitmap[index], 1); +#else + if (global_map_->edge_bitmap[index] < 255) { + global_map_->edge_bitmap[index]++; + } +#endif +} + +__host__ __device__ void CoverageInstrumentation::on_jumpi(uint32_t pc, uint32_t target, bool taken, + const evm_word_t& condition) { + uint64_t distance = compute_branch_distance(condition); + instance_->record_branch(pc, taken, distance); + + // Update global branch counter +#ifdef __CUDA_ARCH__ + atomicAdd(&global_map_->total_branches_executed, 1ULL); +#else + global_map_->total_branches_executed++; +#endif + + // Track branch in detailed entries if space available + uint32_t entry_idx; +#ifdef __CUDA_ARCH__ + entry_idx = atomicAdd(&global_map_->num_branch_entries, 1); +#else + entry_idx = global_map_->num_branch_entries++; +#endif + + if (entry_idx < BRANCH_COVERAGE_SIZE) { + branch_coverage_entry_t* entry = &global_map_->branch_entries[entry_idx]; + entry->pc = pc; + entry->distance_bucket = quantize_distance(distance); + if (taken) { + entry->taken_true = 1; + entry->true_target = target; + } else { + entry->taken_false = 1; + entry->false_target = target; + } + if (entry->min_distance == 0 || distance < entry->min_distance) { + entry->min_distance = distance; + } + } +} + +__host__ __device__ void CoverageInstrumentation::on_sload(uint32_t pc, const evm_word_t& slot, bool warm) { + uint32_t slot_hash = hash_slot(slot); + instance_->record_storage(pc, slot_hash, false); + +#ifdef __CUDA_ARCH__ + atomicAdd(&global_map_->total_storage_ops, 1ULL); +#else + global_map_->total_storage_ops++; +#endif +} + +__host__ __device__ void CoverageInstrumentation::on_sstore(uint32_t pc, const evm_word_t& slot, + const evm_word_t& old_value, const evm_word_t& new_value) { + uint32_t slot_hash = hash_slot(slot); + instance_->record_storage(pc, slot_hash, true); + +#ifdef __CUDA_ARCH__ + atomicAdd(&global_map_->total_storage_ops, 1ULL); +#else + global_map_->total_storage_ops++; +#endif + + // Track in detailed storage entries + uint32_t entry_idx; +#ifdef __CUDA_ARCH__ + entry_idx = atomicAdd(&global_map_->num_storage_entries, 1); +#else + entry_idx = global_map_->num_storage_entries++; +#endif + + if (entry_idx < STORAGE_COVERAGE_SIZE) { + storage_coverage_entry_t* entry = &global_map_->storage_entries[entry_idx]; + entry->pc = pc; + entry->slot_hash = slot_hash; + entry->is_read = 0; + entry->is_write = 1; + // Check if value changed + bool changed = false; + for (int i = 0; i < 8; i++) { + if (old_value._limbs[i] != new_value._limbs[i]) { + changed = true; + break; + } + } + entry->value_changed = changed ? 1 : 0; + } +} + +__host__ __device__ void CoverageInstrumentation::on_call(uint32_t pc, uint8_t opcode, const evm_word_t& target, + const evm_word_t& value, bool success) { + uint32_t target_hash = hash_slot(target); + instance_->record_call(pc, target_hash, opcode, success); + +#ifdef __CUDA_ARCH__ + atomicAdd(&global_map_->total_calls, 1ULL); +#else + global_map_->total_calls++; +#endif + + // Track in detailed call entries + uint32_t entry_idx; +#ifdef __CUDA_ARCH__ + entry_idx = atomicAdd(&global_map_->num_call_entries, 1); +#else + entry_idx = global_map_->num_call_entries++; +#endif + + if (entry_idx < CALL_COVERAGE_SIZE) { + call_coverage_entry_t* entry = &global_map_->call_entries[entry_idx]; + entry->pc = pc; + entry->opcode = opcode; + entry->callee_address_hash = target_hash; + entry->success = success ? 1 : 0; + // Check if precompile (addresses 0x01-0x09) + bool is_precompile = true; + for (int i = 1; i < 8; i++) { + if (target._limbs[i] != 0) { + is_precompile = false; + break; + } + } + if (is_precompile && target._limbs[0] >= 1 && target._limbs[0] <= 9) { + entry->is_precompile = 1; + } else { + entry->is_precompile = 0; + } + // Check if value transferred + bool has_value = false; + for (int i = 0; i < 8; i++) { + if (value._limbs[i] != 0) { + has_value = true; + break; + } + } + entry->value_transferred = has_value ? 1 : 0; + } +} + +__host__ __device__ void CoverageInstrumentation::on_memory_access(uint32_t pc, uint32_t offset, uint32_t size, bool is_write) { + // Memory coverage tracking - hash-based for efficiency + uint32_t mem_hash = (pc << 16) ^ (offset >> 5) ^ (is_write ? 0x1 : 0x0); + uint32_t index = mem_hash % PC_COVERAGE_SIZE; + +#ifdef __CUDA_ARCH__ + atomicAdd((unsigned char*)&global_map_->pc_bitmap[index], 1); +#else + if (global_map_->pc_bitmap[index] < 255) { + global_map_->pc_bitmap[index]++; + } +#endif +} + +__host__ __device__ void CoverageInstrumentation::on_comparison(uint32_t pc, uint8_t opcode, + const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result) { + // Compute comparison distance for gradient guidance + // This helps the fuzzer understand how close we are to flipping the comparison + uint64_t distance = 0; + + // Simple distance: XOR of first 8 bytes + uint64_t a_val = 0, b_val = 0; + for (int i = 0; i < 2; i++) { + a_val |= ((uint64_t)a._limbs[i] << (i * 32)); + b_val |= ((uint64_t)b._limbs[i] << (i * 32)); + } + + if (a_val > b_val) { + distance = a_val - b_val; + } else { + distance = b_val - a_val; + } + + // Record distance bucket for branch guidance + uint8_t bucket = quantize_distance(distance); + + // Update coverage with distance info + uint32_t comp_hash = (pc << 8) ^ opcode ^ bucket; + uint32_t index = comp_hash % EDGE_COVERAGE_SIZE; + +#ifdef __CUDA_ARCH__ + atomicAdd((unsigned char*)&global_map_->edge_bitmap[index], 1); +#else + if (global_map_->edge_bitmap[index] < 255) { + global_map_->edge_bitmap[index]++; + } +#endif +} + +__host__ __device__ void CoverageInstrumentation::on_return(uint32_t pc, bool success, uint32_t return_size) { + // Track return/revert patterns + uint32_t ret_hash = (pc << 1) ^ (success ? 1 : 0) ^ (return_size & 0xFFFF); + uint32_t index = ret_hash % PC_COVERAGE_SIZE; + +#ifdef __CUDA_ARCH__ + atomicAdd((unsigned char*)&global_map_->pc_bitmap[index], 1); +#else + if (global_map_->pc_bitmap[index] < 255) { + global_map_->pc_bitmap[index]++; + } +#endif +} + +__host__ __device__ void CoverageInstrumentation::finalize() { + // Merge instance edge hashes to global bitmap + for (uint32_t i = 0; i < instance_->edge_hash_idx && i < 256; i++) { + uint32_t hash = instance_->edge_hashes[i]; + uint32_t index = hash % EDGE_COVERAGE_SIZE; +#ifdef __CUDA_ARCH__ + atomicAdd((unsigned char*)&global_map_->edge_bitmap[index], 1); +#else + if (global_map_->edge_bitmap[index] < 255) { + global_map_->edge_bitmap[index]++; + } +#endif + } + + // Update PC bitmap from instance + // Note: In production, we'd track actual PCs, but for efficiency we use hashing +} + +__host__ __device__ uint32_t CoverageInstrumentation::hash_edge(uint32_t from, uint32_t to) { + // AFL-style edge hashing + return ((from >> 1) ^ to) & (EDGE_COVERAGE_SIZE - 1); +} + +__host__ __device__ uint32_t CoverageInstrumentation::hash_slot(const evm_word_t& slot) { + // Simple hash of 256-bit storage slot + uint32_t hash = 0; + for (int i = 0; i < 8; i++) { + hash ^= slot._limbs[i]; + hash = (hash << 5) | (hash >> 27); // Rotate + } + return hash; +} + +__host__ __device__ uint8_t CoverageInstrumentation::quantize_distance(uint64_t distance) { + for (uint8_t i = 0; i < DISTANCE_BUCKETS; i++) { + if (distance <= DISTANCE_THRESHOLDS[i]) { + return i; + } + } + return DISTANCE_BUCKETS - 1; +} + +__host__ __device__ uint64_t CoverageInstrumentation::compute_branch_distance(const evm_word_t& condition) { + // Distance to zero (for ISZERO-based branches) + uint64_t distance = 0; + for (int i = 0; i < 2; i++) { + distance |= ((uint64_t)condition._limbs[i] << (i * 32)); + } + return distance; +} + +// ============================================================================ +// Coverage Map Allocator Implementation +// ============================================================================ + +__host__ gpu_coverage_map_t* CoverageMapAllocator::allocate_global(uint32_t num_contracts) { + gpu_coverage_map_t* map = nullptr; + + cudaMallocManaged(&map, sizeof(gpu_coverage_map_t)); + cudaMallocManaged(&map->pc_bitmap, PC_COVERAGE_SIZE * sizeof(coverage_counter_t)); + cudaMallocManaged(&map->edge_bitmap, EDGE_COVERAGE_SIZE * sizeof(coverage_counter_t)); + cudaMallocManaged(&map->opcode_counters, OPCODE_COVERAGE_SIZE * sizeof(coverage_counter_t)); + cudaMallocManaged(&map->branch_entries, BRANCH_COVERAGE_SIZE * sizeof(branch_coverage_entry_t)); + cudaMallocManaged(&map->storage_entries, STORAGE_COVERAGE_SIZE * sizeof(storage_coverage_entry_t)); + cudaMallocManaged(&map->call_entries, CALL_COVERAGE_SIZE * sizeof(call_coverage_entry_t)); + cudaMallocManaged(&map->opcode_stats, OPCODE_COVERAGE_SIZE * sizeof(opcode_stats_t)); + cudaMallocManaged(&map->contract_coverage, num_contracts * sizeof(contract_coverage_t)); + cudaMallocManaged(&map->virgin_bits, (COVERAGE_MAP_SIZE / 32) * sizeof(coverage_bitmap_t)); + + // Initialize + cudaMemset(map->pc_bitmap, 0, PC_COVERAGE_SIZE * sizeof(coverage_counter_t)); + cudaMemset(map->edge_bitmap, 0, EDGE_COVERAGE_SIZE * sizeof(coverage_counter_t)); + cudaMemset(map->opcode_counters, 0, OPCODE_COVERAGE_SIZE * sizeof(coverage_counter_t)); + cudaMemset(map->branch_entries, 0, BRANCH_COVERAGE_SIZE * sizeof(branch_coverage_entry_t)); + cudaMemset(map->storage_entries, 0, STORAGE_COVERAGE_SIZE * sizeof(storage_coverage_entry_t)); + cudaMemset(map->call_entries, 0, CALL_COVERAGE_SIZE * sizeof(call_coverage_entry_t)); + cudaMemset(map->opcode_stats, 0, OPCODE_COVERAGE_SIZE * sizeof(opcode_stats_t)); + cudaMemset(map->virgin_bits, 0xFF, (COVERAGE_MAP_SIZE / 32) * sizeof(coverage_bitmap_t)); // All virgin + + map->num_contracts = num_contracts; + map->init(); + + return map; +} + +__host__ instance_coverage_t* CoverageMapAllocator::allocate_instances(uint32_t num_instances) { + instance_coverage_t* instances = nullptr; + cudaMallocManaged(&instances, num_instances * sizeof(instance_coverage_t)); + + for (uint32_t i = 0; i < num_instances; i++) { + instances[i].init(); + } + + return instances; +} + +__host__ void CoverageMapAllocator::free_global(gpu_coverage_map_t* map) { + if (map) { + cudaFree(map->pc_bitmap); + cudaFree(map->edge_bitmap); + cudaFree(map->opcode_counters); + cudaFree(map->branch_entries); + cudaFree(map->storage_entries); + cudaFree(map->call_entries); + cudaFree(map->opcode_stats); + cudaFree(map->contract_coverage); + cudaFree(map->virgin_bits); + cudaFree(map); + } +} + +__host__ void CoverageMapAllocator::free_instances(instance_coverage_t* instances) { + if (instances) { + cudaFree(instances); + } +} + +__host__ gpu_coverage_map_t* CoverageMapAllocator::allocate_pinned() { + gpu_coverage_map_t* map = nullptr; + cudaMallocHost(&map, sizeof(gpu_coverage_map_t)); + cudaMallocHost(&map->pc_bitmap, PC_COVERAGE_SIZE * sizeof(coverage_counter_t)); + cudaMallocHost(&map->edge_bitmap, EDGE_COVERAGE_SIZE * sizeof(coverage_counter_t)); + return map; +} + +__host__ void CoverageMapAllocator::copy_to_host(gpu_coverage_map_t* host_map, const gpu_coverage_map_t* device_map) { + cudaMemcpy(host_map, device_map, sizeof(gpu_coverage_map_t), cudaMemcpyDeviceToHost); + cudaMemcpy(host_map->pc_bitmap, device_map->pc_bitmap, + PC_COVERAGE_SIZE * sizeof(coverage_counter_t), cudaMemcpyDeviceToHost); + cudaMemcpy(host_map->edge_bitmap, device_map->edge_bitmap, + EDGE_COVERAGE_SIZE * sizeof(coverage_counter_t), cudaMemcpyDeviceToHost); +} + +// ============================================================================ +// Coverage Snapshot Implementation +// ============================================================================ + +__host__ void coverage_snapshot_t::serialize(void* buffer, size_t* size) { + uint8_t* ptr = (uint8_t*)buffer; + + // Write header + memcpy(ptr, &unique_pcs, sizeof(uint32_t)); ptr += sizeof(uint32_t); + memcpy(ptr, &unique_edges, sizeof(uint32_t)); ptr += sizeof(uint32_t); + memcpy(ptr, &unique_branches, sizeof(uint32_t)); ptr += sizeof(uint32_t); + memcpy(ptr, &coverage_score, sizeof(float)); ptr += sizeof(float); + memcpy(ptr, ×tamp, sizeof(uint64_t)); ptr += sizeof(uint64_t); + + // Write bitmap sizes + memcpy(ptr, &pc_bitmap_size, sizeof(uint32_t)); ptr += sizeof(uint32_t); + memcpy(ptr, &edge_bitmap_size, sizeof(uint32_t)); ptr += sizeof(uint32_t); + + // Write bitmaps + memcpy(ptr, pc_bitmap_data, pc_bitmap_size); ptr += pc_bitmap_size; + memcpy(ptr, edge_bitmap_data, edge_bitmap_size); ptr += edge_bitmap_size; + + *size = ptr - (uint8_t*)buffer; +} + +__host__ coverage_snapshot_t coverage_snapshot_t::deserialize(const void* buffer, size_t size) { + coverage_snapshot_t snapshot; + const uint8_t* ptr = (const uint8_t*)buffer; + + memcpy(&snapshot.unique_pcs, ptr, sizeof(uint32_t)); ptr += sizeof(uint32_t); + memcpy(&snapshot.unique_edges, ptr, sizeof(uint32_t)); ptr += sizeof(uint32_t); + memcpy(&snapshot.unique_branches, ptr, sizeof(uint32_t)); ptr += sizeof(uint32_t); + memcpy(&snapshot.coverage_score, ptr, sizeof(float)); ptr += sizeof(float); + memcpy(&snapshot.timestamp, ptr, sizeof(uint64_t)); ptr += sizeof(uint64_t); + + memcpy(&snapshot.pc_bitmap_size, ptr, sizeof(uint32_t)); ptr += sizeof(uint32_t); + memcpy(&snapshot.edge_bitmap_size, ptr, sizeof(uint32_t)); ptr += sizeof(uint32_t); + + snapshot.pc_bitmap_data = (uint8_t*)malloc(snapshot.pc_bitmap_size); + snapshot.edge_bitmap_data = (uint8_t*)malloc(snapshot.edge_bitmap_size); + + memcpy(snapshot.pc_bitmap_data, ptr, snapshot.pc_bitmap_size); ptr += snapshot.pc_bitmap_size; + memcpy(snapshot.edge_bitmap_data, ptr, snapshot.edge_bitmap_size); + + return snapshot; +} + +__host__ bool coverage_snapshot_t::has_new_coverage(const coverage_snapshot_t& baseline) { + return unique_pcs > baseline.unique_pcs || + unique_edges > baseline.unique_edges || + unique_branches > baseline.unique_branches; +} + +__host__ float coverage_snapshot_t::novelty_score(const coverage_snapshot_t& baseline) { + float pc_novelty = (unique_pcs - baseline.unique_pcs) / (float)(baseline.unique_pcs + 1); + float edge_novelty = (unique_edges - baseline.unique_edges) / (float)(baseline.unique_edges + 1); + float branch_novelty = (unique_branches - baseline.unique_branches) / (float)(baseline.unique_branches + 1); + return (pc_novelty + edge_novelty * 2 + branch_novelty * 3) / 6.0f; +} + +// ============================================================================ +// Bitmap Operations +// ============================================================================ + +namespace bitmap_ops { + +__host__ __device__ uint32_t hash_pc(uint32_t pc, uint32_t prev_pc) { + return ((prev_pc >> 1) ^ pc) & (EDGE_COVERAGE_SIZE - 1); +} + +__host__ __device__ void increment_counter(coverage_counter_t* bitmap, uint32_t index) { +#ifdef __CUDA_ARCH__ + atomicAdd((unsigned char*)&bitmap[index], 1); +#else + if (bitmap[index] < 255) { + bitmap[index]++; + } +#endif +} + +__host__ __device__ bool check_virgin(coverage_bitmap_t* virgin, uint32_t index) { + uint32_t word_idx = index / 32; + uint32_t bit_idx = index % 32; + return (virgin[word_idx] & (1U << bit_idx)) != 0; +} + +__host__ __device__ void mark_virgin(coverage_bitmap_t* virgin, uint32_t index) { + uint32_t word_idx = index / 32; + uint32_t bit_idx = index % 32; +#ifdef __CUDA_ARCH__ + atomicAnd(&virgin[word_idx], ~(1U << bit_idx)); +#else + virgin[word_idx] &= ~(1U << bit_idx); +#endif +} + +__host__ uint32_t count_bits(const coverage_counter_t* bitmap, uint32_t size) { + uint32_t count = 0; + for (uint32_t i = 0; i < size; i++) { + if (bitmap[i] > 0) count++; + } + return count; +} + +__host__ uint32_t count_nonzero(const coverage_counter_t* bitmap, uint32_t size) { + return count_bits(bitmap, size); +} + +__host__ void merge_bitmaps(coverage_counter_t* dst, const coverage_counter_t* src, uint32_t size) { + for (uint32_t i = 0; i < size; i++) { + uint16_t sum = (uint16_t)dst[i] + (uint16_t)src[i]; + dst[i] = (sum > 255) ? 255 : (coverage_counter_t)sum; + } +} + +__host__ bool has_new_bits(const coverage_counter_t* current, const coverage_counter_t* virgin, uint32_t size) { + for (uint32_t i = 0; i < size; i++) { + if (current[i] > 0 && virgin[i] == 0xFF) { + return true; + } + } + return false; +} + +} // namespace bitmap_ops + +// ============================================================================ +// CUDA Kernels +// ============================================================================ + +__global__ void kernel_merge_coverage( + gpu_coverage_map_t* global_map, + instance_coverage_t* instances, + uint32_t num_instances +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_instances) return; + + instance_coverage_t* inst = &instances[idx]; + + // Merge edge hashes + for (uint32_t i = 0; i < inst->edge_hash_idx && i < 256; i++) { + uint32_t hash = inst->edge_hashes[i]; + uint32_t index = hash % EDGE_COVERAGE_SIZE; + atomicAdd((unsigned char*)&global_map->edge_bitmap[index], 1); + } + + // Update global stats + atomicAdd(&global_map->total_instructions_executed, (unsigned long long)inst->pcs_hit); + atomicAdd(&global_map->total_branches_executed, (unsigned long long)inst->branches_taken); + atomicAdd(&global_map->total_storage_ops, (unsigned long long)inst->storage_ops); + atomicAdd(&global_map->total_calls, (unsigned long long)inst->calls_made); +} + +__global__ void kernel_compute_coverage_stats( + gpu_coverage_map_t* map, + uint32_t* unique_pcs, + uint32_t* unique_edges, + float* coverage_score +) { + __shared__ uint32_t shared_pc_count; + __shared__ uint32_t shared_edge_count; + + if (threadIdx.x == 0) { + shared_pc_count = 0; + shared_edge_count = 0; + } + __syncthreads(); + + // Count PCs in parallel + uint32_t local_pc_count = 0; + for (uint32_t i = threadIdx.x; i < PC_COVERAGE_SIZE; i += blockDim.x) { + if (map->pc_bitmap[i] > 0) local_pc_count++; + } + atomicAdd(&shared_pc_count, local_pc_count); + + // Count edges in parallel + uint32_t local_edge_count = 0; + for (uint32_t i = threadIdx.x; i < EDGE_COVERAGE_SIZE; i += blockDim.x) { + if (map->edge_bitmap[i] > 0) local_edge_count++; + } + atomicAdd(&shared_edge_count, local_edge_count); + + __syncthreads(); + + if (threadIdx.x == 0) { + *unique_pcs = shared_pc_count; + *unique_edges = shared_edge_count; + *coverage_score = (float)shared_edge_count / (float)EDGE_COVERAGE_SIZE; + } +} + +__global__ void kernel_detect_new_coverage( + gpu_coverage_map_t* current, + gpu_coverage_map_t* baseline, + uint32_t* new_coverage_flags, + uint32_t num_instances +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= EDGE_COVERAGE_SIZE) return; + + // Check if this edge is new + if (current->edge_bitmap[idx] > 0 && baseline->edge_bitmap[idx] == 0) { + // Mark virgin bit + uint32_t word_idx = idx / 32; + uint32_t bit_idx = idx % 32; + atomicAnd(&baseline->virgin_bits[word_idx], ~(1U << bit_idx)); + + // Set flag + new_coverage_flags[0] = 1; + } +} + +} // namespace fuzzing +} // namespace CuEVM diff --git a/CuEVM/src/fuzzing/gpu_fuzzer.cu b/CuEVM/src/fuzzing/gpu_fuzzer.cu new file mode 100644 index 0000000..25b0bc5 --- /dev/null +++ b/CuEVM/src/fuzzing/gpu_fuzzer.cu @@ -0,0 +1,1109 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// GPU Fuzzer Orchestrator Implementation for NVIDIA B300 +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// ============================================================================ +// Fuzzer Configuration Implementation +// ============================================================================ + +__host__ void fuzzer_config_t::set_default() { + num_instances = 8192; + sequence_length = 1; + auto_tune_batch_size = true; + + mutations_per_seed = 4; + havoc_iterations = 8; + abi_aware_mutation = true; + dictionary_mutation = true; + + track_edge_coverage = true; + track_branch_coverage = true; + track_storage_coverage = true; + gradient_guided = true; + + oracle_config.set_default(); + + max_corpus_size = 16384; + min_corpus_size = 64; + minimize_seeds = true; + cull_interval = 1000; + + seed_schedule = 1; // weighted + energy_decay_iterations = 100; + + stats_interval = 100; + checkpoint_interval = 10000; + verbose = false; + + max_iterations = 0; + max_time_seconds = 0; + stall_threshold = 100000; + + gpu_device_id = 0; + use_pinned_memory = true; + use_unified_memory = true; +} + +__host__ void fuzzer_config_t::set_for_b300() { + set_default(); + + // Optimized for B300's capabilities + num_instances = DEFAULT_BATCH_SIZE; // 64K instances + auto_tune_batch_size = true; + + // More aggressive mutation + mutations_per_seed = 8; + havoc_iterations = 16; + + // Larger corpus for B300's memory + max_corpus_size = 65536; + + // Higher performance settings + use_pinned_memory = true; + use_unified_memory = true; +} + +__host__ void fuzzer_config_t::load_from_json(const char* filename) { + FILE* f = fopen(filename, "r"); + if (!f) { + printf("Warning: Could not open config file %s, using defaults\n", filename); + set_default(); + return; + } + + // Simple JSON parsing (would use cJSON in production) + char buffer[4096]; + size_t len = fread(buffer, 1, 4095, f); + buffer[len] = '\0'; + fclose(f); + + // Parse key fields (simplified) + // In production, use proper JSON parsing + set_default(); +} + +__host__ void fuzzer_config_t::save_to_json(const char* filename) { + FILE* f = fopen(filename, "w"); + if (!f) return; + + fprintf(f, "{\n"); + fprintf(f, " \"num_instances\": %u,\n", num_instances); + fprintf(f, " \"sequence_length\": %u,\n", sequence_length); + fprintf(f, " \"auto_tune_batch_size\": %s,\n", auto_tune_batch_size ? "true" : "false"); + fprintf(f, " \"mutations_per_seed\": %u,\n", mutations_per_seed); + fprintf(f, " \"havoc_iterations\": %u,\n", havoc_iterations); + fprintf(f, " \"abi_aware_mutation\": %s,\n", abi_aware_mutation ? "true" : "false"); + fprintf(f, " \"max_corpus_size\": %u,\n", max_corpus_size); + fprintf(f, " \"max_iterations\": %u,\n", max_iterations); + fprintf(f, " \"max_time_seconds\": %u,\n", max_time_seconds); + fprintf(f, " \"gpu_device_id\": %d\n", gpu_device_id); + fprintf(f, "}\n"); + + fclose(f); +} + +// ============================================================================ +// Fuzzer Statistics Implementation +// ============================================================================ + +__host__ void fuzzer_stats_t::init() { + total_iterations = 0; + total_executions = 0; + total_transactions = 0; + + unique_edges = 0; + unique_branches = 0; + unique_pcs = 0; + edge_coverage_percent = 0.0f; + branch_coverage_percent = 0.0f; + + total_bugs_found = 0; + unique_bugs = 0; + critical_bugs = 0; + high_bugs = 0; + medium_bugs = 0; + low_bugs = 0; + + corpus_size = 0; + seeds_added = 0; + seeds_removed = 0; + interesting_seeds = 0; + + total_time_seconds = 0.0; + executions_per_second = 0.0; + transactions_per_second = 0.0; + gpu_utilization = 0.0; + memory_usage_gb = 0.0; + + mutation_time_percent = 0.0; + execution_time_percent = 0.0; + coverage_time_percent = 0.0; + oracle_time_percent = 0.0; + + last_new_coverage_iter = 0; + last_bug_iter = 0; + iterations_since_progress = 0; +} + +__host__ void fuzzer_stats_t::update(const corpus_stats_t& corpus_stats, + const bug_storage_t& bugs, + const gpu_coverage_map_t& coverage) { + corpus_size = corpus_stats.current_size; + unique_edges = coverage.unique_edges; + unique_branches = coverage.unique_branches; + + total_bugs_found = bugs.bug_count; + critical_bugs = bugs.count_by_severity(BugSeverity::CRITICAL); + high_bugs = bugs.count_by_severity(BugSeverity::HIGH); + medium_bugs = bugs.count_by_severity(BugSeverity::MEDIUM); + low_bugs = bugs.count_by_severity(BugSeverity::LOW); + + if (total_time_seconds > 0) { + executions_per_second = total_executions / total_time_seconds; + transactions_per_second = total_transactions / total_time_seconds; + } +} + +__host__ void fuzzer_stats_t::print() { + printf("\n"); + printf("================================================================================\n"); + printf(" FUZZER STATISTICS \n"); + printf("================================================================================\n"); + printf("\n"); + + printf("EXECUTION:\n"); + printf(" Iterations: %lu\n", total_iterations); + printf(" Total Executions: %lu\n", total_executions); + printf(" Total Txs: %lu\n", total_transactions); + printf(" Time (s): %.2f\n", total_time_seconds); + printf(" Exec/sec: %.2f\n", executions_per_second); + printf(" Tx/sec: %.2f\n", transactions_per_second); + printf("\n"); + + printf("COVERAGE:\n"); + printf(" Unique Edges: %u\n", unique_edges); + printf(" Unique Branches: %u\n", unique_branches); + printf(" Unique PCs: %u\n", unique_pcs); + printf(" Edge Coverage: %.2f%%\n", edge_coverage_percent); + printf("\n"); + + printf("BUGS:\n"); + printf(" Total Found: %u\n", total_bugs_found); + printf(" Critical: %u\n", critical_bugs); + printf(" High: %u\n", high_bugs); + printf(" Medium: %u\n", medium_bugs); + printf(" Low: %u\n", low_bugs); + printf("\n"); + + printf("CORPUS:\n"); + printf(" Current Size: %u\n", corpus_size); + printf(" Seeds Added: %u\n", seeds_added); + printf(" Interesting: %u\n", interesting_seeds); + printf("\n"); + + printf("================================================================================\n"); +} + +__host__ void fuzzer_stats_t::print_summary() { + printf("[%lu] execs: %lu (%.0f/s) | cov: %u edges | bugs: %u | corpus: %u\n", + total_iterations, total_executions, executions_per_second, + unique_edges, total_bugs_found, corpus_size); +} + +__host__ void fuzzer_stats_t::export_json(const char* filename) { + FILE* f = fopen(filename, "w"); + if (!f) return; + + fprintf(f, "{\n"); + fprintf(f, " \"total_iterations\": %lu,\n", total_iterations); + fprintf(f, " \"total_executions\": %lu,\n", total_executions); + fprintf(f, " \"total_transactions\": %lu,\n", total_transactions); + fprintf(f, " \"unique_edges\": %u,\n", unique_edges); + fprintf(f, " \"unique_branches\": %u,\n", unique_branches); + fprintf(f, " \"total_bugs_found\": %u,\n", total_bugs_found); + fprintf(f, " \"critical_bugs\": %u,\n", critical_bugs); + fprintf(f, " \"high_bugs\": %u,\n", high_bugs); + fprintf(f, " \"corpus_size\": %u,\n", corpus_size); + fprintf(f, " \"total_time_seconds\": %.2f,\n", total_time_seconds); + fprintf(f, " \"executions_per_second\": %.2f\n", executions_per_second); + fprintf(f, "}\n"); + + fclose(f); +} + +// ============================================================================ +// B300 Batch Optimizer Implementation +// ============================================================================ + +__host__ B300BatchOptimizer::B300BatchOptimizer() + : history_idx_(0), history_count_(0), profiling_enabled_(false), + total_profile_time_(0.0), total_profile_executions_(0) { + for (int i = 0; i < 64; i++) { + throughput_history_[i] = 0.0; + batch_size_history_[i] = 0; + } +} + +__host__ uint32_t B300BatchOptimizer::optimize_batch_size(uint32_t current_batch_size, + double current_throughput, + double gpu_utilization) { + // Record current performance + throughput_history_[history_idx_] = current_throughput; + batch_size_history_[history_idx_] = current_batch_size; + history_idx_ = (history_idx_ + 1) % 64; + if (history_count_ < 64) history_count_++; + + // Find optimal from history + double best_throughput = 0.0; + uint32_t best_batch_size = current_batch_size; + for (uint32_t i = 0; i < history_count_; i++) { + if (throughput_history_[i] > best_throughput) { + best_throughput = throughput_history_[i]; + best_batch_size = batch_size_history_[i]; + } + } + + // If GPU is underutilized, try increasing batch size + if (gpu_utilization < 0.8 && current_batch_size < MAX_BATCH_SIZE) { + return std::min(current_batch_size * 2, MAX_BATCH_SIZE); + } + + // If throughput is declining, try the best historical size + if (history_count_ > 4) { + double recent_avg = 0.0; + for (int i = 0; i < 4; i++) { + int idx = (history_idx_ - 1 - i + 64) % 64; + recent_avg += throughput_history_[idx]; + } + recent_avg /= 4.0; + + if (recent_avg < best_throughput * 0.9) { + return best_batch_size; + } + } + + return current_batch_size; +} + +__host__ void B300BatchOptimizer::compute_optimal_config(uint32_t contract_size, + uint32_t avg_tx_size, + fuzzer_config_t* config) { + // Estimate memory per instance + size_t mem_per_instance = contract_size + // Bytecode + avg_tx_size * 2 + // Input + output + 32 * 1024 + // Stack + memory + sizeof(instance_coverage_t) + + sizeof(execution_state_tracker_t); + + // Calculate max instances that fit in B300's memory + size_t available_memory = (size_t)B300_MEMORY_GB * 1024 * 1024 * 1024; + available_memory = available_memory * 80 / 100; // Reserve 20% for system + + uint32_t max_instances = (uint32_t)(available_memory / mem_per_instance); + max_instances = std::min(max_instances, MAX_BATCH_SIZE); + max_instances = std::max(max_instances, MIN_BATCH_SIZE); + + // Round to multiple of SM count for optimal occupancy + max_instances = (max_instances / B300_SM_COUNT) * B300_SM_COUNT; + + config->num_instances = max_instances; + + // Adjust mutation depth based on contract complexity + if (contract_size > 100000) { + config->mutations_per_seed = 4; + config->havoc_iterations = 4; + } else if (contract_size > 10000) { + config->mutations_per_seed = 8; + config->havoc_iterations = 8; + } else { + config->mutations_per_seed = 16; + config->havoc_iterations = 16; + } +} + +__host__ size_t B300BatchOptimizer::estimate_memory_usage(uint32_t batch_size, + uint32_t sequence_length, + uint32_t avg_tx_size) { + size_t input_memory = batch_size * avg_tx_size * sequence_length; + size_t coverage_memory = batch_size * sizeof(instance_coverage_t); + size_t tracker_memory = batch_size * sizeof(execution_state_tracker_t); + size_t result_memory = batch_size * (sizeof(bool) + sizeof(uint64_t) + 1024); // return data + + return input_memory + coverage_memory + tracker_memory + result_memory; +} + +__host__ void B300BatchOptimizer::start_profiling() { + profiling_enabled_ = true; + profile_start_ = std::chrono::high_resolution_clock::now(); +} + +__host__ void B300BatchOptimizer::end_profiling() { + profiling_enabled_ = false; +} + +__host__ void B300BatchOptimizer::record_iteration(double iteration_time, uint32_t batch_size) { + if (!profiling_enabled_) return; + + total_profile_time_ += iteration_time; + total_profile_executions_ += batch_size; +} + +__host__ void B300BatchOptimizer::print_profile_stats() { + if (total_profile_time_ > 0) { + printf("\nB300 Profiling Stats:\n"); + printf(" Total Time: %.2f s\n", total_profile_time_); + printf(" Total Executions: %lu\n", total_profile_executions_); + printf(" Average Throughput: %.2f exec/s\n", + total_profile_executions_ / total_profile_time_); + } +} + +// ============================================================================ +// GPU Memory Pool Implementation +// ============================================================================ + +__host__ GPUMemoryPool::GPUMemoryPool(size_t input_pool_size, + size_t state_pool_size, + size_t trace_pool_size) + : input_pool_size_(input_pool_size), + state_pool_size_(state_pool_size), + trace_pool_size_(trace_pool_size), + input_pool_offset_(0), + state_pool_offset_(0), + trace_pool_offset_(0) { + + cudaMalloc(&input_pool_, input_pool_size); + cudaMalloc(&state_pool_, state_pool_size); + cudaMalloc(&trace_pool_, trace_pool_size); +} + +__host__ GPUMemoryPool::~GPUMemoryPool() { + cudaFree(input_pool_); + cudaFree(state_pool_); + cudaFree(trace_pool_); +} + +__host__ void* GPUMemoryPool::allocate_input(size_t size) { + size = (size + 255) & ~255; // Align to 256 bytes + if (input_pool_offset_ + size > input_pool_size_) { + return nullptr; + } + void* ptr = input_pool_ + input_pool_offset_; + input_pool_offset_ += size; + return ptr; +} + +__host__ void* GPUMemoryPool::allocate_state(size_t size) { + size = (size + 255) & ~255; + if (state_pool_offset_ + size > state_pool_size_) { + return nullptr; + } + void* ptr = state_pool_ + state_pool_offset_; + state_pool_offset_ += size; + return ptr; +} + +__host__ void* GPUMemoryPool::allocate_trace(size_t size) { + size = (size + 255) & ~255; + if (trace_pool_offset_ + size > trace_pool_size_) { + return nullptr; + } + void* ptr = trace_pool_ + trace_pool_offset_; + trace_pool_offset_ += size; + return ptr; +} + +__host__ void GPUMemoryPool::free_input(void* ptr) { + // Pool-based, no individual frees +} + +__host__ void GPUMemoryPool::free_state(void* ptr) { + // Pool-based, no individual frees +} + +__host__ void GPUMemoryPool::free_trace(void* ptr) { + // Pool-based, no individual frees +} + +__host__ void GPUMemoryPool::reset_input_pool() { + input_pool_offset_ = 0; +} + +__host__ void GPUMemoryPool::reset_trace_pool() { + trace_pool_offset_ = 0; +} + +__host__ size_t GPUMemoryPool::get_input_pool_used() { + return input_pool_offset_; +} + +__host__ size_t GPUMemoryPool::get_state_pool_used() { + return state_pool_offset_; +} + +__host__ size_t GPUMemoryPool::get_trace_pool_used() { + return trace_pool_offset_; +} + +// ============================================================================ +// Execution Batch Implementation +// ============================================================================ + +__host__ void execution_batch_t::allocate(uint32_t instances, uint32_t seq_len, bool sequence_mode) { + num_instances = instances; + sequence_length = seq_len; + is_sequence_mode = sequence_mode; + + allocate_mutation_inputs(&inputs, instances, MAX_SEED_DATA_SIZE); + + if (sequence_mode) { + allocate_sequences(&sequences, instances, seq_len); + } else { + sequences = nullptr; + } + + coverage = CoverageMapAllocator::allocate_instances(instances); + trackers = allocate_trackers(instances); + + cudaMallocManaged(&execution_success, instances * sizeof(bool)); + cudaMallocManaged(&return_data, instances * 1024); // 1KB per instance + cudaMallocManaged(&return_sizes, instances * sizeof(uint32_t)); + cudaMallocManaged(&gas_used, instances * sizeof(uint64_t)); +} + +__host__ void execution_batch_t::free() { + free_mutation_inputs(inputs, num_instances); + if (sequences) { + free_sequences(sequences, num_instances); + } + CoverageMapAllocator::free_instances(coverage); + free_trackers(trackers); + cudaFree(execution_success); + cudaFree(return_data); + cudaFree(return_sizes); + cudaFree(gas_used); +} + +__host__ void execution_batch_t::reset() { + for (uint32_t i = 0; i < num_instances; i++) { + coverage[i].init(); + trackers[i].init(); + execution_success[i] = false; + return_sizes[i] = 0; + gas_used[i] = 0; + } + cudaMemset(return_data, 0, num_instances * 1024); +} + +// ============================================================================ +// GPU Fuzzer Implementation +// ============================================================================ + +__host__ GPUFuzzer::GPUFuzzer(const char* contract_source, + const char* contract_name, + const fuzzer_config_t* config) + : running_(false), initialized_(false), + progress_callback_(nullptr), progress_callback_ctx_(nullptr), + bug_callback_(nullptr), bug_callback_ctx_(nullptr) { + + // Copy contract info + if (contract_source) { + contract_source_ = strdup(contract_source); + } else { + contract_source_ = nullptr; + } + if (contract_name) { + contract_name_ = strdup(contract_name); + } else { + contract_name_ = nullptr; + } + + contract_bytecode_ = nullptr; + bytecode_len_ = 0; + + // Set configuration + if (config) { + config_ = *config; + } else { + config_.set_for_b300(); + } + + // Initialize statistics + stats_.init(); +} + +__host__ GPUFuzzer::~GPUFuzzer() { + if (contract_source_) free(contract_source_); + if (contract_name_) free(contract_name_); + if (contract_bytecode_) cudaFree(contract_bytecode_); + + if (initialized_) { + delete mutation_engine_; + delete corpus_; + delete invariant_checker_; + delete oracle_; + delete batch_optimizer_; + delete memory_pool_; + + CoverageMapAllocator::free_global(global_coverage_); + free_bug_storage(bugs_); + batch_.free(); + + cudaStreamDestroy(mutation_stream_); + cudaStreamDestroy(execution_stream_); + cudaStreamDestroy(analysis_stream_); + } +} + +__host__ bool GPUFuzzer::initialize() { + if (initialized_) return true; + + // Set GPU device + cudaSetDevice(config_.gpu_device_id); + + // Create CUDA streams + cudaStreamCreate(&mutation_stream_); + cudaStreamCreate(&execution_stream_); + cudaStreamCreate(&analysis_stream_); + + // Initialize RNG + rng_state_.init(config_.num_instances, time(nullptr)); + + // Create components + mutation_engine_ = new GPUMutationEngine(config_.num_instances, time(nullptr)); + mutation_engine_->enable_abi_aware(config_.abi_aware_mutation); + + corpus_ = new GPUCorpusManager(config_.max_corpus_size); + + invariant_checker_ = new InvariantChecker(); + + oracle_config_t* oracle_config = allocate_oracle_config(); + *oracle_config = config_.oracle_config; + bugs_ = allocate_bug_storage(); + oracle_ = new CompositeOracle(oracle_config, bugs_); + + batch_optimizer_ = new B300BatchOptimizer(); + memory_pool_ = new GPUMemoryPool(); + + // Allocate global coverage map + global_coverage_ = CoverageMapAllocator::allocate_global(1); + + // Allocate execution batch + batch_.allocate(config_.num_instances, config_.sequence_length, + config_.sequence_length > 1); + + start_time_ = std::chrono::high_resolution_clock::now(); + initialized_ = true; + + return true; +} + +__host__ bool GPUFuzzer::load_contract(const char* bytecode, uint32_t bytecode_len) { + if (contract_bytecode_) { + cudaFree(contract_bytecode_); + } + + bytecode_len_ = bytecode_len; + cudaMallocManaged(&contract_bytecode_, bytecode_len); + memcpy(contract_bytecode_, bytecode, bytecode_len); + + return true; +} + +__host__ void GPUFuzzer::set_config(const fuzzer_config_t& config) { + config_ = config; +} + +__host__ void GPUFuzzer::add_invariant(const invariant_t& inv) { + if (invariant_checker_) { + invariant_checker_->add_invariant(inv); + } +} + +__host__ void GPUFuzzer::add_seed(const uint8_t* calldata, uint32_t len) { + if (!corpus_) return; + + seed_entry_t seed; + seed.init(); + seed.data.length = len; + cudaMallocManaged(&seed.data.data, len); + memcpy(seed.data.data, calldata, len); + seed.data.capacity = len; + seed.num_transactions = 1; + seed.tx_offsets[0] = 0; + seed.tx_lengths[0] = len; + + corpus_->add_seed(seed); +} + +__host__ void GPUFuzzer::generate_initial_seeds() { + if (!corpus_) return; + + // Generate simple seeds + // Empty calldata + uint8_t empty[4] = {0, 0, 0, 0}; + add_seed(empty, 4); + + // Common function selectors with no args + uint8_t selectors[][4] = { + {0x06, 0xfd, 0xde, 0x03}, // name() + {0x95, 0xd8, 0x9b, 0x41}, // symbol() + {0x31, 0x3c, 0xe5, 0x67}, // decimals() + {0x18, 0x16, 0x0d, 0xdd}, // totalSupply() + }; + + for (int i = 0; i < 4; i++) { + add_seed(selectors[i], 4); + } +} + +__host__ void GPUFuzzer::run() { + if (!initialized_ && !initialize()) { + printf("Failed to initialize fuzzer\n"); + return; + } + + running_ = true; + uint32_t iteration = 0; + + printf("Starting GPU fuzzer on B300...\n"); + printf("Config: %u instances, %u sequence length\n", + config_.num_instances, config_.sequence_length); + + while (running_ && !should_stop()) { + // Single fuzzing iteration + prepare_batch(); + execute_batch(); + analyze_batch(); + update_corpus(); + + iteration++; + stats_.total_iterations = iteration; + + // Periodic operations + if (iteration % config_.stats_interval == 0) { + report_progress(); + } + + maybe_cull_corpus(); + maybe_checkpoint(); + } + + printf("\nFuzzing complete.\n"); + print_stats(); +} + +__host__ void GPUFuzzer::run_iterations(uint32_t num_iterations) { + if (!initialized_ && !initialize()) { + return; + } + + running_ = true; + + for (uint32_t i = 0; i < num_iterations && running_; i++) { + prepare_batch(); + execute_batch(); + analyze_batch(); + update_corpus(); + + stats_.total_iterations++; + + if ((i + 1) % config_.stats_interval == 0) { + report_progress(); + } + } +} + +__host__ void GPUFuzzer::stop() { + running_ = false; +} + +__host__ void GPUFuzzer::prepare_batch() { + batch_.reset(); + + // Select seeds from corpus + select_seeds_for_batch(); + + // Mutate selected inputs + mutate_batch(); +} + +__host__ void GPUFuzzer::execute_batch() { + // Execute EVM instances on GPU + // This would interface with CuEVM's kernel_evm_multiple_instances + // For now, simulated + + stats_.total_executions += config_.num_instances; + stats_.total_transactions += config_.num_instances * config_.sequence_length; +} + +__host__ void GPUFuzzer::analyze_batch() { + // Collect coverage + collect_coverage(); + + // Check oracles for bugs + check_oracles(); + + // Check invariants + check_invariants(); + + // Process interesting inputs + process_interesting_inputs(); +} + +__host__ void GPUFuzzer::update_corpus() { + // Update corpus with new interesting seeds + // Handled in process_interesting_inputs +} + +__host__ void GPUFuzzer::select_seeds_for_batch() { + if (corpus_->size() == 0) { + // No seeds in corpus, use default inputs + for (uint32_t i = 0; i < config_.num_instances; i++) { + batch_.inputs[i].length = 4; + for (int j = 0; j < 4; j++) { + batch_.inputs[i].data[j] = 0; + } + } + return; + } + + // Select seeds based on scheduling policy + for (uint32_t i = 0; i < config_.num_instances; i++) { + seed_entry_t* seed; + if (config_.seed_schedule == 1) { + seed = corpus_->select_weighted(&rng_state_.states[i]); + } else { + seed = corpus_->select_seed(&rng_state_.states[i]); + } + + if (seed) { + batch_.inputs[i].copy_from(seed->data); + } + } +} + +__host__ void GPUFuzzer::mutate_batch() { + mutation_engine_->mutate_batch(batch_.inputs, config_.num_instances, + config_.mutations_per_seed, mutation_stream_); + cudaStreamSynchronize(mutation_stream_); +} + +__host__ void GPUFuzzer::collect_coverage() { + // Merge instance coverage to global + uint32_t blocks = (config_.num_instances + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + kernel_merge_coverage<<>>( + global_coverage_, batch_.coverage, config_.num_instances + ); + cudaStreamSynchronize(analysis_stream_); +} + +__host__ void GPUFuzzer::check_oracles() { + uint32_t blocks = (config_.num_instances + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + kernel_check_reentrancy<<>>( + batch_.trackers, config_.num_instances, bugs_, &config_.oracle_config + ); + cudaStreamSynchronize(analysis_stream_); +} + +__host__ void GPUFuzzer::check_invariants() { + // Check invariants on post-states + // Would check against stored invariants +} + +__host__ void GPUFuzzer::process_interesting_inputs() { + // Find inputs that caused new coverage + uint32_t prev_edges = stats_.unique_edges; + + // Count current coverage + uint32_t new_edges = 0; + for (uint32_t i = 0; i < EDGE_COVERAGE_SIZE; i++) { + if (global_coverage_->edge_bitmap[i] > 0) new_edges++; + } + + if (new_edges > prev_edges) { + stats_.unique_edges = new_edges; + stats_.last_new_coverage_iter = stats_.total_iterations; + stats_.iterations_since_progress = 0; + + // Add interesting inputs to corpus + // (Would track which inputs caused the new coverage) + stats_.seeds_added++; + } else { + stats_.iterations_since_progress++; + } + + // Check for new bugs + if (bugs_->bug_count > stats_.total_bugs_found) { + stats_.total_bugs_found = bugs_->bug_count; + stats_.last_bug_iter = stats_.total_iterations; + stats_.iterations_since_progress = 0; + + // Callback for new bug + if (bug_callback_ && bugs_->bug_count > 0) { + bug_callback_(&bugs_->bugs[bugs_->bug_count - 1], bug_callback_ctx_); + } + } +} + +__host__ void GPUFuzzer::update_statistics() { + auto now = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = now - start_time_; + stats_.total_time_seconds = elapsed.count(); + + if (stats_.total_time_seconds > 0) { + stats_.executions_per_second = stats_.total_executions / stats_.total_time_seconds; + stats_.transactions_per_second = stats_.total_transactions / stats_.total_time_seconds; + } + + stats_.corpus_size = corpus_->size(); + stats_.update(*corpus_->get_stats(), *bugs_, *global_coverage_); +} + +__host__ void GPUFuzzer::report_progress() { + update_statistics(); + + if (config_.verbose) { + stats_.print_summary(); + } + + if (progress_callback_) { + progress_callback_(&stats_, progress_callback_ctx_); + } +} + +__host__ void GPUFuzzer::maybe_cull_corpus() { + if (config_.cull_interval > 0 && + stats_.total_iterations % config_.cull_interval == 0) { + corpus_->cull_corpus(); + } +} + +__host__ void GPUFuzzer::maybe_checkpoint() { + if (config_.checkpoint_interval > 0 && + stats_.total_iterations % config_.checkpoint_interval == 0) { + char filename[256]; + snprintf(filename, sizeof(filename), "checkpoint_%lu.bin", + stats_.total_iterations); + save_checkpoint(filename); + } +} + +__host__ bool GPUFuzzer::should_stop() { + if (config_.max_iterations > 0 && + stats_.total_iterations >= config_.max_iterations) { + return true; + } + + if (config_.max_time_seconds > 0 && + stats_.total_time_seconds >= config_.max_time_seconds) { + return true; + } + + if (config_.stall_threshold > 0 && + stats_.iterations_since_progress >= config_.stall_threshold) { + printf("Stopping: No progress for %u iterations\n", config_.stall_threshold); + return true; + } + + return false; +} + +__host__ void GPUFuzzer::print_stats() { + update_statistics(); + stats_.print(); +} + +__host__ void GPUFuzzer::print_bugs() { + print_bug_report(bugs_); +} + +__host__ void GPUFuzzer::export_results(const char* directory) { + char filename[512]; + + // Export stats + snprintf(filename, sizeof(filename), "%s/stats.json", directory); + stats_.export_json(filename); + + // Export bugs + snprintf(filename, sizeof(filename), "%s/bugs.json", directory); + export_bugs_json(bugs_, filename); + + // Export coverage + snprintf(filename, sizeof(filename), "%s/coverage.bin", directory); + // Would save coverage bitmap + + // Export corpus + snprintf(filename, sizeof(filename), "%s/corpus", directory); + corpus_->export_seeds(filename); +} + +__host__ void GPUFuzzer::save_checkpoint(const char* filename) { + FILE* f = fopen(filename, "wb"); + if (!f) return; + + // Write stats + fwrite(&stats_, sizeof(stats_), 1, f); + + // Write coverage + fwrite(global_coverage_->edge_bitmap, EDGE_COVERAGE_SIZE, 1, f); + + // Write corpus info + uint32_t corpus_size = corpus_->size(); + fwrite(&corpus_size, sizeof(corpus_size), 1, f); + + fclose(f); +} + +__host__ void GPUFuzzer::load_checkpoint(const char* filename) { + FILE* f = fopen(filename, "rb"); + if (!f) return; + + // Read stats + fread(&stats_, sizeof(stats_), 1, f); + + // Read coverage + fread(global_coverage_->edge_bitmap, EDGE_COVERAGE_SIZE, 1, f); + + fclose(f); +} + +__host__ void GPUFuzzer::set_progress_callback(progress_callback_t cb, void* ctx) { + progress_callback_ = cb; + progress_callback_ctx_ = ctx; +} + +__host__ void GPUFuzzer::set_bug_callback(bug_callback_t cb, void* ctx) { + bug_callback_ = cb; + bug_callback_ctx_ = ctx; +} + +// ============================================================================ +// Convenience Functions +// ============================================================================ + +__host__ fuzzer_stats_t quick_fuzz( + const char* contract_source, + const char* contract_name, + uint32_t num_iterations, + uint32_t num_instances) { + + fuzzer_config_t config; + config.set_for_b300(); + config.num_instances = num_instances; + config.max_iterations = num_iterations; + + GPUFuzzer fuzzer(contract_source, contract_name, &config); + fuzzer.initialize(); + fuzzer.generate_initial_seeds(); + fuzzer.run(); + + return *fuzzer.get_stats(); +} + +__host__ fuzzer_stats_t fuzz_with_config( + const char* contract_source, + const char* contract_name, + const fuzzer_config_t& config) { + + GPUFuzzer fuzzer(contract_source, contract_name, &config); + fuzzer.initialize(); + fuzzer.generate_initial_seeds(); + fuzzer.run(); + + return *fuzzer.get_stats(); +} + +// ============================================================================ +// CUDA Kernel Implementations +// ============================================================================ + +__global__ void kernel_merge_batch_coverage( + instance_coverage_t* instance_coverage, + gpu_coverage_map_t* global_coverage, + uint32_t num_instances, + uint32_t* new_coverage_flags) { + + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_instances) return; + + instance_coverage_t* inst = &instance_coverage[idx]; + + // Merge edge hashes + for (uint32_t i = 0; i < inst->edge_hash_idx && i < 256; i++) { + uint32_t hash = inst->edge_hashes[i]; + uint32_t bitmap_idx = hash % EDGE_COVERAGE_SIZE; + + uint8_t old_val = global_coverage->edge_bitmap[bitmap_idx]; + atomicAdd((unsigned char*)&global_coverage->edge_bitmap[bitmap_idx], 1); + + if (old_val == 0) { + atomicExch(new_coverage_flags, 1); + } + } + + // Update global stats + atomicAdd(&global_coverage->total_instructions_executed, + (unsigned long long)inst->pcs_hit); +} + +__global__ void kernel_run_oracles( + CompositeOracle* oracle, + execution_state_tracker_t* trackers, + uint32_t num_instances, + bug_storage_t* bugs) { + + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_instances) return; + + // Check for reentrancy in this instance + if (trackers[idx].check_reentrancy()) { + detected_bug_t bug; + bug.type = BugType::REENTRANCY_ETH; + bug.severity = BugSeverity::CRITICAL; + bug.location.pc = 0; + bug.location.tx_index = 0; + bug.location.call_depth = trackers[idx].call_depth; + bugs->add_bug(bug); + } +} + +__global__ void kernel_weighted_selection( + seed_entry_t* seeds, + uint32_t num_seeds, + uint32_t* cumulative_weights, + uint32_t* selected_indices, + uint32_t num_to_select, + curandState* rng) { + + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_to_select) return; + + uint32_t total_weight = cumulative_weights[num_seeds - 1]; + uint32_t rand_val = curand(&rng[idx]) % total_weight; + + // Binary search for the selected seed + uint32_t low = 0, high = num_seeds - 1; + while (low < high) { + uint32_t mid = (low + high) / 2; + if (cumulative_weights[mid] <= rand_val) { + low = mid + 1; + } else { + high = mid; + } + } + + selected_indices[idx] = low; +} + +} // namespace fuzzing +} // namespace CuEVM diff --git a/CuEVM/src/fuzzing/mutation.cu b/CuEVM/src/fuzzing/mutation.cu new file mode 100644 index 0000000..7d774fa --- /dev/null +++ b/CuEVM/src/fuzzing/mutation.cu @@ -0,0 +1,1558 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// GPU Mutation Engine Implementation for NVIDIA B300 +// SPDX-License-Identifier: MIT + +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// ============================================================================ +// Interesting Values Definitions (declared in mutation.cuh) +// ============================================================================ + +// 8-bit interesting values +__constant__ int8_t INTERESTING_8_VALUES[NUM_INTERESTING_8] = { + -128, -1, 0, 1, 16, 32, 64, 100, 127 +}; + +// 16-bit interesting values +__constant__ int16_t INTERESTING_16_VALUES[NUM_INTERESTING_16] = { + -32768, -129, -128, -1, 0, 1, 127, 128, 255, 256, + 512, 1000, 1024, 4096, 32767 +}; + +// 32-bit interesting values +__constant__ int32_t INTERESTING_32_VALUES[NUM_INTERESTING_32] = { + -2147483648, -100663046, -32769, -32768, -129, -128, -1, + 0, 1, 127, 128, 255, 256, 512, 1000, 1024, 4096, 32767, + 32768, 65535, 65536, 100663045, 2147483647 +}; + +// 64-bit interesting values (for Solidity uint256 boundaries) +__constant__ int64_t INTERESTING_64_VALUES[NUM_INTERESTING_64] = { + 0LL, + 1LL, + -1LL, + 255LL, + 256LL, + 65535LL, + 65536LL, + 0x7FFFFFFFLL, + 0x80000000LL, + 0xFFFFFFFFLL, + 0x100000000LL, + 0x7FFFFFFFFFFFFFFFLL, + (int64_t)0x8000000000000000ULL, + -1LL // 0xFFFFFFFFFFFFFFFF +}; + +// ============================================================================ +// EVM Interesting Values (256-bit) +// ============================================================================ + +// Pre-defined interesting 256-bit values for Solidity +__device__ __constant__ uint32_t EVM_INTERESTING_256[][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, // 0 + {1, 0, 0, 0, 0, 0, 0, 0}, // 1 + {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, // MAX_UINT256 + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, // MAX_UINT256 - 1 + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0, 0, 0, 0, 0, 0, 0, 0x80000000}, // MIN_INT256 + {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, // MAX_INT256 + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF}, + {0, 0, 0, 0, 0, 0, 1, 0}, // 2^64 + {0, 0, 0, 0, 0, 0, 0, 1}, // 2^224 + {0, 0, 0, 0, 1, 0, 0, 0}, // 2^128 + {0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0}, // 2^32 - 1 + {0, 1, 0, 0, 0, 0, 0, 0}, // 2^32 + {0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0}, // 2^64 - 1 + {0, 0, 1, 0, 0, 0, 0, 0}, // 2^64 + // Common Ether values + {0x4A817C80, 0xDE0B6B3, 0, 0, 0, 0, 0, 0}, // 1 ETH in wei (10^18) + {0x2D79883D, 0x8AC72304, 0x89E8, 0, 0, 0, 0, 0}, // 10000 ETH + // Common addresses + {0xDEADBEEF, 0xCAFEBABE, 0x12345678, 0x9ABCDEF0, 0, 0, 0, 0}, +}; +constexpr uint32_t NUM_EVM_INTERESTING = 16; + +// Common function selectors +__device__ __constant__ uint8_t COMMON_SELECTORS[][4] = { + {0xa9, 0x05, 0x9c, 0xbb}, // transfer(address,uint256) + {0x23, 0xb8, 0x72, 0xdd}, // transferFrom(address,address,uint256) + {0x09, 0x5e, 0xa7, 0xb3}, // approve(address,uint256) + {0x70, 0xa0, 0x82, 0x31}, // balanceOf(address) + {0xdd, 0x62, 0xed, 0x3e}, // allowance(address,address) + {0x40, 0xc1, 0x0f, 0x19}, // mint(address,uint256) + {0x42, 0x96, 0x6c, 0x68}, // burn(uint256) + {0x79, 0xcc, 0x67, 0x90}, // burnFrom(address,uint256) + {0x18, 0x16, 0x0d, 0xdd}, // totalSupply() + {0x06, 0xfd, 0xde, 0x03}, // name() + {0x95, 0xd8, 0x9b, 0x41}, // symbol() + {0x31, 0x3c, 0xe5, 0x67}, // decimals() + {0xb6, 0xb5, 0x5f, 0x25}, // deposit() + {0x2e, 0x1a, 0x7d, 0x4d}, // withdraw(uint256) + {0x3c, 0xcf, 0xd6, 0x0b}, // stake(uint256) + {0x2e, 0x17, 0xde, 0x78}, // unstake(uint256) +}; +constexpr uint32_t NUM_COMMON_SELECTORS = 16; + +// ============================================================================ +// Mutation Dictionary Implementation +// ============================================================================ + +__host__ __device__ void mutation_dictionary_t::init() { + num_entries = 0; + next_insert_idx = 0; + num_addresses = 0; + num_selectors = 0; + num_values = 0; +} + +__host__ __device__ bool mutation_dictionary_t::add_entry(const uint8_t* data, uint8_t length, + DictionaryEntryType type, uint32_t pc) { + if (length > 64) length = 64; + + // Check for duplicates (simple linear search - could optimize with hashing) + for (uint32_t i = 0; i < num_entries; i++) { + if (entries[i].length == length && entries[i].entry_type == (uint8_t)type) { + bool match = true; + for (uint8_t j = 0; j < length && match; j++) { + if (entries[i].data[j] != data[j]) match = false; + } + if (match) { + entries[i].hit_count++; + return false; // Already exists + } + } + } + + // Add new entry + uint32_t idx; + if (num_entries < MAX_DICTIONARY_SIZE) { + idx = num_entries++; + } else { + // Replace oldest entry (FIFO) + idx = next_insert_idx; + next_insert_idx = (next_insert_idx + 1) % MAX_DICTIONARY_SIZE; + } + + for (uint8_t i = 0; i < length; i++) { + entries[idx].data[i] = data[i]; + } + entries[idx].length = length; + entries[idx].entry_type = (uint8_t)type; + entries[idx].hit_count = 1; + entries[idx].source_pc = pc; + + // Update type-specific index + switch (type) { + case DictionaryEntryType::ADDRESS: + if (num_addresses < 256) { + address_indices[num_addresses++] = idx; + } + break; + case DictionaryEntryType::FUNCTION_SELECTOR: + if (num_selectors < 256) { + selector_indices[num_selectors++] = idx; + } + break; + case DictionaryEntryType::UINT256_VALUE: + case DictionaryEntryType::BYTES32_VALUE: + if (num_values < 256) { + value_indices[num_values++] = idx; + } + break; + default: + break; + } + + return true; +} + +__host__ __device__ const dictionary_entry_t* mutation_dictionary_t::get_random(curandState* rng, + DictionaryEntryType type) { + if (num_entries == 0) return nullptr; + +#ifdef __CUDA_ARCH__ + uint32_t rand_val = curand(rng); +#else + uint32_t rand_val = rand(); +#endif + + if (type == (DictionaryEntryType)255) { + // Any type + return &entries[rand_val % num_entries]; + } + + // Type-specific lookup + switch (type) { + case DictionaryEntryType::ADDRESS: + if (num_addresses > 0) { + return &entries[address_indices[rand_val % num_addresses]]; + } + break; + case DictionaryEntryType::FUNCTION_SELECTOR: + if (num_selectors > 0) { + return &entries[selector_indices[rand_val % num_selectors]]; + } + break; + case DictionaryEntryType::UINT256_VALUE: + case DictionaryEntryType::BYTES32_VALUE: + if (num_values > 0) { + return &entries[value_indices[rand_val % num_values]]; + } + break; + default: + break; + } + + return &entries[rand_val % num_entries]; +} + +__host__ __device__ void mutation_dictionary_t::update_hit_count(uint32_t idx) { + if (idx < num_entries) { + entries[idx].hit_count++; + } +} + +// ============================================================================ +// Mutation Input Implementation +// ============================================================================ + +__host__ __device__ void mutation_input_t::init(uint32_t max_size) { + capacity = max_size; + length = 0; + num_params = 0; + for (int i = 0; i < 4; i++) selector[i] = 0; + for (int i = 0; i < 32; i++) { + param_offsets[i] = 0; + param_types[i] = 0; + } +} + +__host__ __device__ void mutation_input_t::copy_from(const mutation_input_t& other) { + if (capacity < other.length) return; + + length = other.length; + for (uint32_t i = 0; i < length; i++) { + data[i] = other.data[i]; + } + for (int i = 0; i < 4; i++) selector[i] = other.selector[i]; + num_params = other.num_params; + for (uint32_t i = 0; i < num_params && i < 32; i++) { + param_offsets[i] = other.param_offsets[i]; + param_types[i] = other.param_types[i]; + } + // Copy 256-bit values + for (int i = 0; i < 8; i++) { + value._limbs[i] = other.value._limbs[i]; + gas_limit._limbs[i] = other.gas_limit._limbs[i]; + sender._limbs[i] = other.sender._limbs[i]; + receiver._limbs[i] = other.receiver._limbs[i]; + block_number._limbs[i] = other.block_number._limbs[i]; + timestamp._limbs[i] = other.timestamp._limbs[i]; + basefee._limbs[i] = other.basefee._limbs[i]; + prevrandao._limbs[i] = other.prevrandao._limbs[i]; + } +} + +__host__ __device__ void mutation_input_t::parse_abi() { + if (length < 4) return; + + // Extract selector + for (int i = 0; i < 4; i++) { + selector[i] = data[i]; + } + + // Parse parameters (32-byte chunks) + num_params = 0; + for (uint32_t offset = 4; offset + 32 <= length && num_params < 32; offset += 32) { + param_offsets[num_params] = offset; + // Simple type detection based on leading zeros + uint32_t leading_zeros = 0; + for (uint32_t i = 0; i < 32 && data[offset + i] == 0; i++) { + leading_zeros++; + } + if (leading_zeros >= 12) { + param_types[num_params] = (uint8_t)abi::ABIType::ADDRESS; // Likely address + } else if (leading_zeros >= 24) { + param_types[num_params] = (uint8_t)abi::ABIType::UINT64; + } else { + param_types[num_params] = (uint8_t)abi::ABIType::UINT256; + } + num_params++; + } +} + +__host__ __device__ void mutation_input_t::reserialize_abi() { + // Ensure selector is at the start + for (int i = 0; i < 4; i++) { + data[i] = selector[i]; + } + // Parameters should already be in place +} + +// ============================================================================ +// GPU RNG State Implementation +// ============================================================================ + +__host__ void gpu_rng_state_t::init(uint32_t num_threads, uint64_t seed) { + num_states = num_threads; + cudaMalloc(&states, num_threads * sizeof(curandState)); + + // Initialize RNG states on GPU + uint32_t block_size = 256; + uint32_t num_blocks = (num_threads + block_size - 1) / block_size; + kernel_init_rng<<>>(states, num_threads, seed); + cudaDeviceSynchronize(); +} + +__host__ void gpu_rng_state_t::free() { + if (states) { + cudaFree(states); + states = nullptr; + } +} + +// ============================================================================ +// GPU Mutation Engine Implementation +// ============================================================================ + +__host__ GPUMutationEngine::GPUMutationEngine(uint32_t num_instances, uint64_t seed) { + rng_state_.init(num_instances, seed); + + cudaMallocManaged(&dictionary_, sizeof(mutation_dictionary_t)); + dictionary_->init(); + + // Default mutation weights + for (int i = 0; i < 64; i++) mutation_weights_[i] = 10; + mutation_weights_[(int)MutationType::FLIP_BIT_1] = WEIGHT_BIT_FLIP; + mutation_weights_[(int)MutationType::FLIP_BYTE_1] = WEIGHT_BYTE_FLIP; + mutation_weights_[(int)MutationType::ARITH_INC_8] = WEIGHT_ARITH_INC; + mutation_weights_[(int)MutationType::ARITH_DEC_8] = WEIGHT_ARITH_DEC; + mutation_weights_[(int)MutationType::INTERESTING_8] = WEIGHT_INTERESTING; + mutation_weights_[(int)MutationType::DICT_INSERT] = WEIGHT_DICTIONARY; + mutation_weights_[(int)MutationType::HAVOC_SINGLE] = WEIGHT_HAVOC; + mutation_weights_[(int)MutationType::SPLICE] = WEIGHT_SPLICE; + + max_mutations_ = 16; + abi_aware_ = true; +} + +__host__ GPUMutationEngine::~GPUMutationEngine() { + rng_state_.free(); + if (dictionary_) { + cudaFree(dictionary_); + } +} + +__device__ MutationType GPUMutationEngine::select_mutation_type(curandState* rng) { + uint32_t total_weight = 0; + for (int i = 0; i < (int)MutationType::NUM_MUTATION_TYPES; i++) { + total_weight += mutation_weights_[i]; + } + + uint32_t rand_val = curand(rng) % total_weight; + uint32_t cumulative = 0; + + for (int i = 0; i < (int)MutationType::NUM_MUTATION_TYPES; i++) { + cumulative += mutation_weights_[i]; + if (rand_val < cumulative) { + return (MutationType)i; + } + } + + return MutationType::FLIP_BIT_1; +} + +__device__ uint32_t GPUMutationEngine::select_offset(uint32_t length, curandState* rng) { + if (length == 0) return 0; + return curand(rng) % length; +} + +__device__ mutation_result_t GPUMutationEngine::mutate(mutation_input_t* input, curandState* rng) { + MutationType type = select_mutation_type(rng); + return mutate_typed(input, type, rng); +} + +__device__ mutation_result_t GPUMutationEngine::mutate_typed(mutation_input_t* input, MutationType type, curandState* rng) { + mutation_result_t result; + result.type = type; + result.success = false; + result.size_delta = 0; + + if (input->length == 0) return result; + + result.offset = select_offset(input->length, rng); + + switch (type) { + case MutationType::FLIP_BIT_1: + flip_bit(input->data, input->length, result.offset, 1); + result.success = true; + break; + + case MutationType::FLIP_BIT_2: + flip_bit(input->data, input->length, result.offset, 2); + result.success = true; + break; + + case MutationType::FLIP_BIT_4: + flip_bit(input->data, input->length, result.offset, 4); + result.success = true; + break; + + case MutationType::FLIP_BYTE_1: + flip_byte(input->data, input->length, result.offset, 1); + result.success = true; + break; + + case MutationType::FLIP_BYTE_2: + flip_byte(input->data, input->length, result.offset, 2); + result.success = true; + break; + + case MutationType::FLIP_BYTE_4: + flip_byte(input->data, input->length, result.offset, 4); + result.success = true; + break; + + case MutationType::ARITH_INC_8: + arith_mutation(input->data, input->length, result.offset, 1, true, (curand(rng) % ARITH_MAX_DELTA) + 1); + result.success = true; + break; + + case MutationType::ARITH_DEC_8: + arith_mutation(input->data, input->length, result.offset, 1, false, (curand(rng) % ARITH_MAX_DELTA) + 1); + result.success = true; + break; + + case MutationType::ARITH_INC_16: + arith_mutation(input->data, input->length, result.offset, 2, true, (curand(rng) % ARITH_MAX_DELTA) + 1); + result.success = true; + break; + + case MutationType::ARITH_DEC_16: + arith_mutation(input->data, input->length, result.offset, 2, false, (curand(rng) % ARITH_MAX_DELTA) + 1); + result.success = true; + break; + + case MutationType::ARITH_INC_32: + arith_mutation(input->data, input->length, result.offset, 4, true, (curand(rng) % ARITH_MAX_DELTA) + 1); + result.success = true; + break; + + case MutationType::ARITH_DEC_32: + arith_mutation(input->data, input->length, result.offset, 4, false, (curand(rng) % ARITH_MAX_DELTA) + 1); + result.success = true; + break; + + case MutationType::INTERESTING_8: + case MutationType::INTERESTING_16: + case MutationType::INTERESTING_32: + case MutationType::INTERESTING_64: + interesting_mutation(input->data, input->length, result.offset, + (type == MutationType::INTERESTING_8) ? 1 : + (type == MutationType::INTERESTING_16) ? 2 : + (type == MutationType::INTERESTING_32) ? 4 : 8, rng); + result.success = true; + break; + + case MutationType::INTERESTING_256: + if (result.offset + 32 <= input->length) { + uint32_t idx = curand(rng) % NUM_EVM_INTERESTING; + for (int i = 0; i < 8; i++) { + uint32_t val = EVM_INTERESTING_256[idx][i]; + input->data[result.offset + i*4] = val & 0xFF; + input->data[result.offset + i*4 + 1] = (val >> 8) & 0xFF; + input->data[result.offset + i*4 + 2] = (val >> 16) & 0xFF; + input->data[result.offset + i*4 + 3] = (val >> 24) & 0xFF; + } + result.success = true; + } + break; + + case MutationType::DICT_INSERT: + case MutationType::DICT_OVERWRITE: + apply_dictionary(input, rng); + result.success = true; + break; + + case MutationType::HAVOC_SINGLE: + havoc(input, rng, 1); + result.success = true; + break; + + case MutationType::HAVOC_MULTI: + havoc(input, rng, 2 + (curand(rng) % 6)); + result.success = true; + break; + + case MutationType::EVM_ADDRESS: + mutate_address(input, result.offset, rng); + result.success = true; + break; + + case MutationType::EVM_UINT256: + mutate_uint256(input, result.offset, rng); + result.success = true; + break; + + case MutationType::EVM_SELECTOR: + mutate_selector(input, rng); + result.success = true; + break; + + case MutationType::EVM_CALLDATA: + mutate_calldata(input, rng); + result.success = true; + break; + + case MutationType::DELETE_BYTES: + if (input->length > 8) { + uint32_t count = 1 + (curand(rng) % 4); + if (result.offset + count <= input->length) { + delete_bytes(input, result.offset, count); + result.size_delta = -(int32_t)count; + result.success = true; + } + } + break; + + case MutationType::CLONE_BYTE: + if (input->length > 1 && input->length < input->capacity - 4) { + uint32_t src = curand(rng) % input->length; + uint32_t count = 1 + (curand(rng) % 4); + if (input->length + count <= input->capacity) { + clone_bytes(input, src, result.offset, count); + result.size_delta = count; + result.success = true; + } + } + break; + + case MutationType::SWAP_BYTES: + if (input->length > 4) { + uint32_t offset2 = curand(rng) % input->length; + uint32_t count = 1 + (curand(rng) % 4); + if (result.offset + count <= input->length && offset2 + count <= input->length) { + swap_bytes(input->data, result.offset, offset2, count); + result.success = true; + } + } + break; + + case MutationType::SHUFFLE_BYTES: + if (input->length > 4) { + uint32_t count = 4 + (curand(rng) % 12); + if (result.offset + count <= input->length) { + shuffle_bytes(input->data, result.offset, count, rng); + result.success = true; + } + } + break; + + case MutationType::BOUNDARY_LOW: + // Set to boundary value (0 or 1) + if (result.offset + 32 <= input->length) { + for (uint32_t i = 0; i < 31; i++) { + input->data[result.offset + i] = 0; + } + input->data[result.offset + 31] = curand(rng) % 2; + result.success = true; + } + break; + + case MutationType::BOUNDARY_HIGH: + // Set to max boundary + if (result.offset + 32 <= input->length) { + for (uint32_t i = 0; i < 32; i++) { + input->data[result.offset + i] = 0xFF; + } + result.success = true; + } + break; + + case MutationType::BOUNDARY_POWER2: + // Set to power of 2 + if (result.offset + 32 <= input->length) { + for (uint32_t i = 0; i < 32; i++) { + input->data[result.offset + i] = 0; + } + uint32_t bit_pos = curand(rng) % 256; + uint32_t byte_pos = bit_pos / 8; + uint32_t bit_in_byte = bit_pos % 8; + input->data[result.offset + 31 - byte_pos] = 1 << bit_in_byte; + result.success = true; + } + break; + + default: + break; + } + + return result; +} + +__device__ void GPUMutationEngine::flip_bit(uint8_t* data, uint32_t length, uint32_t offset, uint8_t width) { + if (offset >= length) return; + for (uint8_t i = 0; i < width && offset < length; i++) { + uint8_t bit = i % 8; + data[offset] ^= (1 << bit); + if ((i + 1) % 8 == 0) offset++; + } +} + +__device__ void GPUMutationEngine::flip_byte(uint8_t* data, uint32_t length, uint32_t offset, uint8_t width) { + for (uint8_t i = 0; i < width && offset + i < length; i++) { + data[offset + i] ^= 0xFF; + } +} + +__device__ void GPUMutationEngine::arith_mutation(uint8_t* data, uint32_t length, uint32_t offset, + uint8_t width, bool increment, int32_t delta) { + if (offset + width > length) return; + + switch (width) { + case 1: { + if (increment) { + data[offset] += delta; + } else { + data[offset] -= delta; + } + break; + } + case 2: { + uint16_t val = data[offset] | (data[offset + 1] << 8); + if (increment) val += delta; + else val -= delta; + data[offset] = val & 0xFF; + data[offset + 1] = (val >> 8) & 0xFF; + break; + } + case 4: { + uint32_t val = data[offset] | (data[offset + 1] << 8) | + (data[offset + 2] << 16) | (data[offset + 3] << 24); + if (increment) val += delta; + else val -= delta; + data[offset] = val & 0xFF; + data[offset + 1] = (val >> 8) & 0xFF; + data[offset + 2] = (val >> 16) & 0xFF; + data[offset + 3] = (val >> 24) & 0xFF; + break; + } + default: + break; + } +} + +__device__ void GPUMutationEngine::interesting_mutation(uint8_t* data, uint32_t length, uint32_t offset, + uint8_t width, curandState* rng) { + if (offset + width > length) return; + + switch (width) { + case 1: { + uint32_t idx = curand(rng) % NUM_INTERESTING_8; + data[offset] = (uint8_t)INTERESTING_8_VALUES[idx]; + break; + } + case 2: { + uint32_t idx = curand(rng) % NUM_INTERESTING_16; + int16_t val = INTERESTING_16_VALUES[idx]; + data[offset] = val & 0xFF; + data[offset + 1] = (val >> 8) & 0xFF; + break; + } + case 4: { + uint32_t idx = curand(rng) % NUM_INTERESTING_32; + int32_t val = INTERESTING_32_VALUES[idx]; + data[offset] = val & 0xFF; + data[offset + 1] = (val >> 8) & 0xFF; + data[offset + 2] = (val >> 16) & 0xFF; + data[offset + 3] = (val >> 24) & 0xFF; + break; + } + case 8: { + uint32_t idx = curand(rng) % NUM_INTERESTING_64; + int64_t val = INTERESTING_64_VALUES[idx]; + for (int i = 0; i < 8; i++) { + data[offset + i] = (val >> (i * 8)) & 0xFF; + } + break; + } + default: + break; + } +} + +__device__ void GPUMutationEngine::clone_bytes(mutation_input_t* input, uint32_t src_offset, + uint32_t dst_offset, uint32_t count) { + if (input->length + count > input->capacity) return; + + // Shift data to make room + for (int32_t i = input->length - 1; i >= (int32_t)dst_offset; i--) { + input->data[i + count] = input->data[i]; + } + + // Copy bytes + for (uint32_t i = 0; i < count; i++) { + input->data[dst_offset + i] = input->data[src_offset + i + (src_offset >= dst_offset ? count : 0)]; + } + + input->length += count; +} + +__device__ void GPUMutationEngine::delete_bytes(mutation_input_t* input, uint32_t offset, uint32_t count) { + if (offset + count > input->length) return; + + for (uint32_t i = offset; i + count < input->length; i++) { + input->data[i] = input->data[i + count]; + } + + input->length -= count; +} + +__device__ void GPUMutationEngine::insert_bytes(mutation_input_t* input, uint32_t offset, + const uint8_t* data, uint32_t count) { + if (input->length + count > input->capacity) return; + + // Shift existing data + for (int32_t i = input->length - 1; i >= (int32_t)offset; i--) { + input->data[i + count] = input->data[i]; + } + + // Insert new data + for (uint32_t i = 0; i < count; i++) { + input->data[offset + i] = data[i]; + } + + input->length += count; +} + +__device__ void GPUMutationEngine::overwrite_bytes(mutation_input_t* input, uint32_t offset, + const uint8_t* data, uint32_t count) { + for (uint32_t i = 0; i < count && offset + i < input->length; i++) { + input->data[offset + i] = data[i]; + } +} + +__device__ void GPUMutationEngine::swap_bytes(uint8_t* data, uint32_t offset1, uint32_t offset2, uint32_t count) { + for (uint32_t i = 0; i < count; i++) { + uint8_t tmp = data[offset1 + i]; + data[offset1 + i] = data[offset2 + i]; + data[offset2 + i] = tmp; + } +} + +__device__ void GPUMutationEngine::shuffle_bytes(uint8_t* data, uint32_t offset, uint32_t count, curandState* rng) { + for (uint32_t i = count - 1; i > 0; i--) { + uint32_t j = curand(rng) % (i + 1); + uint8_t tmp = data[offset + i]; + data[offset + i] = data[offset + j]; + data[offset + j] = tmp; + } +} + +__device__ void GPUMutationEngine::havoc(mutation_input_t* input, curandState* rng, uint32_t num_mutations) { + for (uint32_t i = 0; i < num_mutations; i++) { + // Exclude complex mutations from havoc to avoid exponential growth + MutationType type = (MutationType)(curand(rng) % 20); + mutate_typed(input, type, rng); + } +} + +__device__ void GPUMutationEngine::splice(mutation_input_t* dst, const mutation_input_t* src1, + const mutation_input_t* src2, curandState* rng) { + if (src1->length == 0 || src2->length == 0) return; + + uint32_t split1 = curand(rng) % src1->length; + uint32_t split2 = curand(rng) % src2->length; + + // Take first part from src1, second part from src2 + uint32_t new_len = split1 + (src2->length - split2); + if (new_len > dst->capacity) new_len = dst->capacity; + + for (uint32_t i = 0; i < split1 && i < new_len; i++) { + dst->data[i] = src1->data[i]; + } + for (uint32_t i = 0; i + split1 < new_len; i++) { + dst->data[split1 + i] = src2->data[split2 + i]; + } + + dst->length = new_len; +} + +__device__ void GPUMutationEngine::crossover(mutation_input_t* dst, const mutation_input_t* src1, + const mutation_input_t* src2, curandState* rng) { + if (src1->length == 0 || src2->length == 0) return; + + // Two-point crossover + uint32_t min_len = (src1->length < src2->length) ? src1->length : src2->length; + uint32_t pt1 = curand(rng) % min_len; + uint32_t pt2 = pt1 + (curand(rng) % (min_len - pt1)); + + dst->length = min_len; + + for (uint32_t i = 0; i < min_len; i++) { + if (i < pt1 || i >= pt2) { + dst->data[i] = src1->data[i]; + } else { + dst->data[i] = src2->data[i]; + } + } +} + +__device__ void GPUMutationEngine::mutate_address(mutation_input_t* input, uint32_t offset, curandState* rng) { + if (offset + 32 > input->length) return; + + // Address is 20 bytes, right-padded in 32-byte slot + // Zero out first 12 bytes + for (int i = 0; i < 12; i++) { + input->data[offset + i] = 0; + } + + // Generate random address or use dictionary + if (dictionary_->num_addresses > 0 && (curand(rng) % 4) < 3) { + const dictionary_entry_t* entry = dictionary_->get_random(rng, DictionaryEntryType::ADDRESS); + if (entry && entry->length >= 20) { + for (int i = 0; i < 20; i++) { + input->data[offset + 12 + i] = entry->data[i]; + } + return; + } + } + + // Random address + for (int i = 0; i < 20; i++) { + input->data[offset + 12 + i] = curand(rng) & 0xFF; + } +} + +__device__ void GPUMutationEngine::mutate_uint256(mutation_input_t* input, uint32_t offset, curandState* rng) { + if (offset + 32 > input->length) return; + + uint32_t strategy = curand(rng) % 10; + + switch (strategy) { + case 0: // Zero + for (int i = 0; i < 32; i++) input->data[offset + i] = 0; + break; + case 1: // One + for (int i = 0; i < 31; i++) input->data[offset + i] = 0; + input->data[offset + 31] = 1; + break; + case 2: // Max + for (int i = 0; i < 32; i++) input->data[offset + i] = 0xFF; + break; + case 3: // Power of 2 + { + for (int i = 0; i < 32; i++) input->data[offset + i] = 0; + uint32_t bit = curand(rng) % 256; + input->data[offset + 31 - bit / 8] = 1 << (bit % 8); + break; + } + case 4: // EVM interesting value + { + uint32_t idx = curand(rng) % NUM_EVM_INTERESTING; + for (int i = 0; i < 8; i++) { + uint32_t val = EVM_INTERESTING_256[idx][i]; + input->data[offset + i*4] = val & 0xFF; + input->data[offset + i*4 + 1] = (val >> 8) & 0xFF; + input->data[offset + i*4 + 2] = (val >> 16) & 0xFF; + input->data[offset + i*4 + 3] = (val >> 24) & 0xFF; + } + break; + } + case 5: // Dictionary value + if (dictionary_->num_values > 0) { + const dictionary_entry_t* entry = dictionary_->get_random(rng, DictionaryEntryType::UINT256_VALUE); + if (entry && entry->length >= 32) { + for (int i = 0; i < 32; i++) { + input->data[offset + i] = entry->data[i]; + } + } + } + break; + default: // Random + for (int i = 0; i < 32; i++) { + input->data[offset + i] = curand(rng) & 0xFF; + } + break; + } +} + +__device__ void GPUMutationEngine::mutate_selector(mutation_input_t* input, curandState* rng) { + if (input->length < 4) return; + + uint32_t strategy = curand(rng) % 4; + + switch (strategy) { + case 0: // Common selector + { + uint32_t idx = curand(rng) % NUM_COMMON_SELECTORS; + for (int i = 0; i < 4; i++) { + input->data[i] = COMMON_SELECTORS[idx][i]; + input->selector[i] = COMMON_SELECTORS[idx][i]; + } + break; + } + case 1: // Dictionary selector + if (dictionary_->num_selectors > 0) { + const dictionary_entry_t* entry = dictionary_->get_random(rng, DictionaryEntryType::FUNCTION_SELECTOR); + if (entry && entry->length >= 4) { + for (int i = 0; i < 4; i++) { + input->data[i] = entry->data[i]; + input->selector[i] = entry->data[i]; + } + } + } + break; + default: // Random selector + for (int i = 0; i < 4; i++) { + input->data[i] = curand(rng) & 0xFF; + input->selector[i] = input->data[i]; + } + break; + } +} + +__device__ void GPUMutationEngine::mutate_calldata(mutation_input_t* input, curandState* rng) { + if (!abi_aware_ || input->num_params == 0) { + // Random mutation if not ABI-aware + mutate(input, rng); + return; + } + + // Pick a random parameter to mutate + uint32_t param_idx = curand(rng) % input->num_params; + uint32_t offset = input->param_offsets[param_idx]; + abi::ABIType type = (abi::ABIType)input->param_types[param_idx]; + + abi::mutate_by_type(input->data, offset, type, rng); +} + +__device__ void GPUMutationEngine::mutate_value(mutation_input_t* input, curandState* rng) { + uint32_t strategy = curand(rng) % 6; + + switch (strategy) { + case 0: // Zero + for (int i = 0; i < 8; i++) input->value._limbs[i] = 0; + break; + case 1: // Small value + { + for (int i = 1; i < 8; i++) input->value._limbs[i] = 0; + input->value._limbs[0] = curand(rng) % 1000; + break; + } + case 2: // 1 ETH equivalent + { + for (int i = 2; i < 8; i++) input->value._limbs[i] = 0; + input->value._limbs[0] = 0x4A817C80; // 10^18 low bits + input->value._limbs[1] = 0xDE0B6B3; // 10^18 high bits + break; + } + case 3: // Max available (simulated) + for (int i = 0; i < 8; i++) input->value._limbs[i] = 0xFFFFFFFF; + break; + default: // Random + { + for (int i = 0; i < 8; i++) { + input->value._limbs[i] = curand(rng); + } + break; + } + } +} + +__device__ void GPUMutationEngine::mutate_gas(mutation_input_t* input, curandState* rng) { + uint32_t strategy = curand(rng) % 4; + + // Clear high bits + for (int i = 2; i < 8; i++) input->gas_limit._limbs[i] = 0; + + switch (strategy) { + case 0: // Minimum gas + input->gas_limit._limbs[0] = 21000; + input->gas_limit._limbs[1] = 0; + break; + case 1: // Standard gas limit + input->gas_limit._limbs[0] = 3000000; + input->gas_limit._limbs[1] = 0; + break; + case 2: // High gas + input->gas_limit._limbs[0] = 30000000; + input->gas_limit._limbs[1] = 0; + break; + default: // Random + input->gas_limit._limbs[0] = curand(rng) % 50000000; + input->gas_limit._limbs[1] = 0; + break; + } +} + +__device__ void GPUMutationEngine::mutate_sender(mutation_input_t* input, curandState* rng) { + // Zero high bytes (address is 20 bytes) + for (int i = 5; i < 8; i++) input->sender._limbs[i] = 0; + input->sender._limbs[4] &= 0xFFFF; // Only low 4 bytes of limb 4 + + if (dictionary_->num_addresses > 0 && (curand(rng) % 3) < 2) { + const dictionary_entry_t* entry = dictionary_->get_random(rng, DictionaryEntryType::ADDRESS); + if (entry && entry->length >= 20) { + // Copy address to sender + for (int i = 0; i < 5; i++) { + input->sender._limbs[i] = + entry->data[i*4] | (entry->data[i*4+1] << 8) | + (entry->data[i*4+2] << 16) | (entry->data[i*4+3] << 24); + } + return; + } + } + + // Generate random sender + for (int i = 0; i < 5; i++) { + input->sender._limbs[i] = curand(rng); + } +} + +__device__ void GPUMutationEngine::mutate_block_context(mutation_input_t* input, curandState* rng) { + uint32_t field = curand(rng) % 4; + + switch (field) { + case 0: // Block number + input->block_number._limbs[0] = 15000000 + (curand(rng) % 5000000); + for (int i = 1; i < 8; i++) input->block_number._limbs[i] = 0; + break; + case 1: // Timestamp + // Current-ish timestamp + input->timestamp._limbs[0] = 1700000000 + (curand(rng) % 100000000); + for (int i = 1; i < 8; i++) input->timestamp._limbs[i] = 0; + break; + case 2: // Basefee + input->basefee._limbs[0] = curand(rng) % 1000000000000; // Up to 1000 Gwei + for (int i = 1; i < 8; i++) input->basefee._limbs[i] = 0; + break; + case 3: // Prevrandao + for (int i = 0; i < 8; i++) { + input->prevrandao._limbs[i] = curand(rng); + } + break; + } +} + +__host__ __device__ void GPUMutationEngine::add_to_dictionary(const uint8_t* data, uint8_t length, + DictionaryEntryType type, uint32_t pc) { + dictionary_->add_entry(data, length, type, pc); +} + +__device__ void GPUMutationEngine::apply_dictionary(mutation_input_t* input, curandState* rng) { + const dictionary_entry_t* entry = dictionary_->get_random(rng); + if (!entry) return; + + uint32_t offset = select_offset(input->length, rng); + + // Overwrite or insert based on type + if (curand(rng) % 2 == 0) { + // Overwrite + overwrite_bytes(input, offset, entry->data, entry->length); + } else { + // Insert if space available + if (input->length + entry->length <= input->capacity) { + insert_bytes(input, offset, entry->data, entry->length); + } + } +} + +__device__ void GPUMutationEngine::gradient_mutate(mutation_input_t* input, uint32_t target_offset, + bool increase, curandState* rng) { + if (target_offset + 32 > input->length) return; + + // Gradient-guided mutation: try to move value toward target + uint32_t delta = 1 + (curand(rng) % 16); + + if (increase) { + // Try to increase value + uint64_t val = 0; + for (int i = 0; i < 8; i++) { + val |= ((uint64_t)input->data[target_offset + 24 + i]) << (i * 8); + } + val += delta; + for (int i = 0; i < 8; i++) { + input->data[target_offset + 24 + i] = (val >> (i * 8)) & 0xFF; + } + } else { + // Try to decrease value + uint64_t val = 0; + for (int i = 0; i < 8; i++) { + val |= ((uint64_t)input->data[target_offset + 24 + i]) << (i * 8); + } + if (val >= delta) val -= delta; + for (int i = 0; i < 8; i++) { + input->data[target_offset + 24 + i] = (val >> (i * 8)) & 0xFF; + } + } +} + +__host__ void GPUMutationEngine::set_mutation_weights(const uint8_t* weights) { + memcpy(mutation_weights_, weights, 64); +} + +__host__ void GPUMutationEngine::set_max_mutations(uint32_t max) { + max_mutations_ = max; +} + +__host__ void GPUMutationEngine::enable_abi_aware(bool enable) { + abi_aware_ = enable; +} + +__host__ void GPUMutationEngine::mutate_batch(mutation_input_t* inputs, uint32_t num_inputs, + uint32_t mutations_per_input, cudaStream_t stream) { + mutation_result_t* results; + cudaMalloc(&results, num_inputs * mutations_per_input * sizeof(mutation_result_t)); + + uint32_t block_size = 256; + uint32_t num_blocks = (num_inputs + block_size - 1) / block_size; + + kernel_mutate_batch<<>>( + this, inputs, num_inputs, mutations_per_input, rng_state_.states, results + ); + + cudaFree(results); +} + +// ============================================================================ +// Sequence Mutator Implementation +// ============================================================================ + +__host__ __device__ void sequence_t::init(uint32_t max_txs) { + capacity = max_txs; + num_transactions = 0; + seed = 0; +} + +__host__ __device__ void sequence_t::add_transaction(const transaction_t& tx) { + if (num_transactions < capacity) { + transactions[num_transactions] = tx; + transactions[num_transactions].tx_index = num_transactions; + num_transactions++; + } +} + +__host__ __device__ void sequence_t::remove_transaction(uint32_t index) { + if (index >= num_transactions) return; + for (uint32_t i = index; i < num_transactions - 1; i++) { + transactions[i] = transactions[i + 1]; + transactions[i].tx_index = i; + } + num_transactions--; +} + +__host__ __device__ void sequence_t::reorder(uint32_t from, uint32_t to) { + if (from >= num_transactions || to >= num_transactions || from == to) return; + transaction_t tmp = transactions[from]; + if (from < to) { + for (uint32_t i = from; i < to; i++) { + transactions[i] = transactions[i + 1]; + transactions[i].tx_index = i; + } + } else { + for (uint32_t i = from; i > to; i--) { + transactions[i] = transactions[i - 1]; + transactions[i].tx_index = i; + } + } + transactions[to] = tmp; + transactions[to].tx_index = to; +} + +__host__ __device__ void sequence_t::copy_from(const sequence_t& other) { + num_transactions = (other.num_transactions < capacity) ? other.num_transactions : capacity; + seed = other.seed; + for (uint32_t i = 0; i < num_transactions; i++) { + transactions[i] = other.transactions[i]; + } +} + +__host__ SequenceMutator::SequenceMutator(GPUMutationEngine* engine) : engine_(engine) {} + +__device__ void SequenceMutator::mutate_sequence(sequence_t* seq, curandState* rng) { + if (seq->num_transactions == 0) return; + + uint32_t operation = curand(rng) % 8; + + switch (operation) { + case 0: // Mutate random transaction + mutate_transaction(seq, curand(rng) % seq->num_transactions, rng); + break; + case 1: // Swap two transactions + if (seq->num_transactions > 1) { + swap_transactions(seq, curand(rng) % seq->num_transactions, + curand(rng) % seq->num_transactions); + } + break; + case 2: // Duplicate transaction + if (seq->num_transactions < seq->capacity) { + duplicate_transaction(seq, curand(rng) % seq->num_transactions); + } + break; + case 3: // Delete transaction + if (seq->num_transactions > 1) { + delete_transaction(seq, curand(rng) % seq->num_transactions); + } + break; + case 4: // Reorder + if (seq->num_transactions > 1) { + seq->reorder(curand(rng) % seq->num_transactions, + curand(rng) % seq->num_transactions); + } + break; + case 5: // Mutate sender pattern + mutate_sender_pattern(seq, rng); + break; + case 6: // Mutate value flow + mutate_value_flow(seq, rng); + break; + default: // Mutate all transactions + for (uint32_t i = 0; i < seq->num_transactions; i++) { + mutate_transaction(seq, i, rng); + } + break; + } +} + +__device__ void SequenceMutator::insert_transaction(sequence_t* seq, uint32_t index, curandState* rng) { + if (seq->num_transactions >= seq->capacity) return; + + // Shift transactions + for (uint32_t i = seq->num_transactions; i > index; i--) { + seq->transactions[i] = seq->transactions[i - 1]; + seq->transactions[i].tx_index = i; + } + + // Create new transaction (copy from adjacent and mutate) + if (index > 0) { + seq->transactions[index] = seq->transactions[index - 1]; + } + seq->transactions[index].tx_index = index; + seq->num_transactions++; + + engine_->mutate(&seq->transactions[index].input, rng); +} + +__device__ void SequenceMutator::delete_transaction(sequence_t* seq, uint32_t index) { + seq->remove_transaction(index); +} + +__device__ void SequenceMutator::duplicate_transaction(sequence_t* seq, uint32_t index) { + if (seq->num_transactions >= seq->capacity || index >= seq->num_transactions) return; + + seq->transactions[seq->num_transactions] = seq->transactions[index]; + seq->transactions[seq->num_transactions].tx_index = seq->num_transactions; + seq->num_transactions++; +} + +__device__ void SequenceMutator::swap_transactions(sequence_t* seq, uint32_t idx1, uint32_t idx2) { + if (idx1 >= seq->num_transactions || idx2 >= seq->num_transactions) return; + + transaction_t tmp = seq->transactions[idx1]; + seq->transactions[idx1] = seq->transactions[idx2]; + seq->transactions[idx2] = tmp; + + seq->transactions[idx1].tx_index = idx1; + seq->transactions[idx2].tx_index = idx2; +} + +__device__ void SequenceMutator::splice_sequences(sequence_t* dst, const sequence_t* src1, + const sequence_t* src2, curandState* rng) { + if (src1->num_transactions == 0 || src2->num_transactions == 0) return; + + uint32_t split1 = curand(rng) % src1->num_transactions; + uint32_t split2 = curand(rng) % src2->num_transactions; + + dst->num_transactions = 0; + + // Copy first part from src1 + for (uint32_t i = 0; i < split1 && dst->num_transactions < dst->capacity; i++) { + dst->transactions[dst->num_transactions] = src1->transactions[i]; + dst->transactions[dst->num_transactions].tx_index = dst->num_transactions; + dst->num_transactions++; + } + + // Copy second part from src2 + for (uint32_t i = split2; i < src2->num_transactions && dst->num_transactions < dst->capacity; i++) { + dst->transactions[dst->num_transactions] = src2->transactions[i]; + dst->transactions[dst->num_transactions].tx_index = dst->num_transactions; + dst->num_transactions++; + } +} + +__device__ void SequenceMutator::mutate_transaction(sequence_t* seq, uint32_t tx_index, curandState* rng) { + if (tx_index >= seq->num_transactions) return; + + engine_->mutate(&seq->transactions[tx_index].input, rng); +} + +__device__ void SequenceMutator::mutate_sender_pattern(sequence_t* seq, curandState* rng) { + // Apply same sender mutation across all transactions + evm_word_t new_sender; + for (int i = 0; i < 5; i++) new_sender._limbs[i] = curand(rng); + for (int i = 5; i < 8; i++) new_sender._limbs[i] = 0; + + for (uint32_t i = 0; i < seq->num_transactions; i++) { + for (int j = 0; j < 8; j++) { + seq->transactions[i].input.sender._limbs[j] = new_sender._limbs[j]; + } + } +} + +__device__ void SequenceMutator::mutate_value_flow(sequence_t* seq, curandState* rng) { + // Create ascending/descending value pattern + bool ascending = curand(rng) % 2; + uint64_t base_value = curand(rng) % 1000000; + uint64_t delta = curand(rng) % 10000; + + for (uint32_t i = 0; i < seq->num_transactions; i++) { + uint64_t value = ascending ? (base_value + i * delta) : (base_value - i * delta); + seq->transactions[i].input.value._limbs[0] = value & 0xFFFFFFFF; + seq->transactions[i].input.value._limbs[1] = (value >> 32) & 0xFFFFFFFF; + for (int j = 2; j < 8; j++) { + seq->transactions[i].input.value._limbs[j] = 0; + } + } +} + +// ============================================================================ +// ABI Helper Implementations +// ============================================================================ + +namespace abi { + +__device__ ABIType detect_param_type(const uint8_t* data, uint32_t offset, uint32_t length) { + if (offset + 32 > length) return ABIType::UINT256; + + // Count leading zeros + uint32_t leading_zeros = 0; + for (uint32_t i = 0; i < 32 && data[offset + i] == 0; i++) { + leading_zeros++; + } + + if (leading_zeros >= 12 && leading_zeros < 32) { + return ABIType::ADDRESS; // 20-byte address + } else if (leading_zeros >= 24) { + return ABIType::UINT64; + } else if (leading_zeros >= 28) { + return ABIType::UINT32; + } else if (leading_zeros >= 30) { + return ABIType::UINT16; + } else if (leading_zeros >= 31) { + return ABIType::UINT8; + } + + return ABIType::UINT256; +} + +__device__ uint32_t get_type_size(ABIType type) { + switch (type) { + case ABIType::UINT8: + case ABIType::INT8: + case ABIType::BOOL: + case ABIType::BYTES1: + return 1; + case ABIType::UINT16: + case ABIType::INT16: + case ABIType::BYTES2: + return 2; + case ABIType::UINT32: + case ABIType::INT32: + case ABIType::BYTES4: + case ABIType::FUNCTION: + return 4; + case ABIType::UINT64: + case ABIType::INT64: + case ABIType::BYTES8: + return 8; + case ABIType::UINT128: + case ABIType::INT128: + case ABIType::BYTES16: + return 16; + case ABIType::ADDRESS: + return 20; + case ABIType::UINT256: + case ABIType::INT256: + case ABIType::BYTES32: + default: + return 32; + } +} + +__device__ void mutate_by_type(uint8_t* data, uint32_t offset, ABIType type, curandState* rng) { + uint32_t strategy = curand(rng) % 4; + + switch (type) { + case ABIType::ADDRESS: + // Zero prefix, then 20 random bytes + for (int i = 0; i < 12; i++) data[offset + i] = 0; + for (int i = 12; i < 32; i++) data[offset + i] = curand(rng) & 0xFF; + break; + + case ABIType::BOOL: + for (int i = 0; i < 31; i++) data[offset + i] = 0; + data[offset + 31] = curand(rng) % 2; + break; + + case ABIType::UINT8: + case ABIType::INT8: + for (int i = 0; i < 31; i++) data[offset + i] = 0; + if (strategy == 0) data[offset + 31] = 0; + else if (strategy == 1) data[offset + 31] = 0xFF; + else data[offset + 31] = curand(rng) & 0xFF; + break; + + case ABIType::UINT256: + case ABIType::INT256: + case ABIType::BYTES32: + default: + if (strategy == 0) { + // Zero + for (int i = 0; i < 32; i++) data[offset + i] = 0; + } else if (strategy == 1) { + // Max + for (int i = 0; i < 32; i++) data[offset + i] = 0xFF; + } else { + // Random + for (int i = 0; i < 32; i++) data[offset + i] = curand(rng) & 0xFF; + } + break; + } +} + +__device__ void generate_by_type(uint8_t* data, uint32_t offset, ABIType type, curandState* rng) { + mutate_by_type(data, offset, type, rng); // Same logic for generation +} + +__device__ bool lookup_selector(const uint8_t* selector, ABIType* param_types, uint32_t* num_params) { + // This would normally require a full selector database + // For now, return false (unknown selector) + return false; +} + +} // namespace abi + +// ============================================================================ +// CUDA Kernel Implementations +// ============================================================================ + +__global__ void kernel_init_rng(curandState* states, uint32_t num_states, uint64_t seed) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_states) return; + + curand_init(seed, idx, 0, &states[idx]); +} + +__global__ void kernel_mutate_batch( + GPUMutationEngine* engine, + mutation_input_t* inputs, + uint32_t num_inputs, + uint32_t mutations_per_input, + curandState* rng_states, + mutation_result_t* results +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_inputs) return; + + curandState* rng = &rng_states[idx]; + + for (uint32_t m = 0; m < mutations_per_input; m++) { + mutation_result_t result = engine->mutate(&inputs[idx], rng); + if (results) { + results[idx * mutations_per_input + m] = result; + } + } +} + +__global__ void kernel_havoc_batch( + GPUMutationEngine* engine, + mutation_input_t* inputs, + uint32_t num_inputs, + uint32_t havoc_iterations, + curandState* rng_states +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_inputs) return; + + curandState* rng = &rng_states[idx]; + engine->havoc(&inputs[idx], rng, havoc_iterations); +} + +__global__ void kernel_splice_batch( + GPUMutationEngine* engine, + mutation_input_t* dst, + const mutation_input_t* src1, + const mutation_input_t* src2, + uint32_t num_pairs, + curandState* rng_states +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_pairs) return; + + curandState* rng = &rng_states[idx]; + engine->splice(&dst[idx], &src1[idx], &src2[idx], rng); +} + +__global__ void kernel_mutate_sequences( + SequenceMutator* mutator, + sequence_t* sequences, + uint32_t num_sequences, + curandState* rng_states +) { + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_sequences) return; + + curandState* rng = &rng_states[idx]; + mutator->mutate_sequence(&sequences[idx], rng); +} + +// ============================================================================ +// Host Helper Functions +// ============================================================================ + +__host__ void allocate_mutation_inputs(mutation_input_t** inputs, uint32_t num_inputs, uint32_t max_size) { + cudaMallocManaged(inputs, num_inputs * sizeof(mutation_input_t)); + + for (uint32_t i = 0; i < num_inputs; i++) { + cudaMallocManaged(&(*inputs)[i].data, max_size); + (*inputs)[i].init(max_size); + } +} + +__host__ void free_mutation_inputs(mutation_input_t* inputs, uint32_t num_inputs) { + for (uint32_t i = 0; i < num_inputs; i++) { + if (inputs[i].data) { + cudaFree(inputs[i].data); + } + } + cudaFree(inputs); +} + +__host__ void allocate_sequences(sequence_t** sequences, uint32_t num_sequences, uint32_t max_txs) { + cudaMallocManaged(sequences, num_sequences * sizeof(sequence_t)); + + for (uint32_t i = 0; i < num_sequences; i++) { + cudaMallocManaged(&(*sequences)[i].transactions, max_txs * sizeof(transaction_t)); + (*sequences)[i].init(max_txs); + } +} + +__host__ void free_sequences(sequence_t* sequences, uint32_t num_sequences) { + for (uint32_t i = 0; i < num_sequences; i++) { + if (sequences[i].transactions) { + cudaFree(sequences[i].transactions); + } + } + cudaFree(sequences); +} + +} // namespace fuzzing +} // namespace CuEVM diff --git a/CuEVM/src/fuzzing/oracle.cu b/CuEVM/src/fuzzing/oracle.cu new file mode 100644 index 0000000..24be320 --- /dev/null +++ b/CuEVM/src/fuzzing/oracle.cu @@ -0,0 +1,1289 @@ +// CuEVM: CUDA Ethereum Virtual Machine implementation +// Comprehensive Oracle and Bug Detection Implementation +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include + +namespace CuEVM { +namespace fuzzing { + +// EVM Opcodes for reference +constexpr uint8_t OP_ADD = 0x01; +constexpr uint8_t OP_MUL = 0x02; +constexpr uint8_t OP_SUB = 0x03; +constexpr uint8_t OP_DIV = 0x04; +constexpr uint8_t OP_SDIV = 0x05; +constexpr uint8_t OP_MOD = 0x06; +constexpr uint8_t OP_SMOD = 0x07; +constexpr uint8_t OP_EXP = 0x0A; +constexpr uint8_t OP_SLOAD = 0x54; +constexpr uint8_t OP_SSTORE = 0x55; +constexpr uint8_t OP_CALL = 0xF1; +constexpr uint8_t OP_CALLCODE = 0xF2; +constexpr uint8_t OP_DELEGATECALL = 0xF4; +constexpr uint8_t OP_STATICCALL = 0xFA; +constexpr uint8_t OP_CREATE = 0xF0; +constexpr uint8_t OP_CREATE2 = 0xF5; +constexpr uint8_t OP_SELFDESTRUCT = 0xFF; +constexpr uint8_t OP_ORIGIN = 0x32; +constexpr uint8_t OP_CALLER = 0x33; + +// ============================================================================ +// Helper Functions for 256-bit Arithmetic +// ============================================================================ + +__host__ __device__ bool is_zero(const evm_word_t& val) { + for (int i = 0; i < 8; i++) { + if (val._limbs[i] != 0) return false; + } + return true; +} + +__host__ __device__ bool equals(const evm_word_t& a, const evm_word_t& b) { + for (int i = 0; i < 8; i++) { + if (a._limbs[i] != b._limbs[i]) return false; + } + return true; +} + +__host__ __device__ bool less_than(const evm_word_t& a, const evm_word_t& b) { + for (int i = 7; i >= 0; i--) { + if (a._limbs[i] < b._limbs[i]) return true; + if (a._limbs[i] > b._limbs[i]) return false; + } + return false; +} + +__host__ __device__ bool greater_than(const evm_word_t& a, const evm_word_t& b) { + for (int i = 7; i >= 0; i--) { + if (a._limbs[i] > b._limbs[i]) return true; + if (a._limbs[i] < b._limbs[i]) return false; + } + return false; +} + +__host__ __device__ void copy_word(evm_word_t& dst, const evm_word_t& src) { + for (int i = 0; i < 8; i++) { + dst._limbs[i] = src._limbs[i]; + } +} + +__host__ __device__ void zero_word(evm_word_t& val) { + for (int i = 0; i < 8; i++) { + val._limbs[i] = 0; + } +} + +__host__ __device__ uint64_t hash_word(const evm_word_t& val) { + uint64_t hash = 0; + for (int i = 0; i < 8; i++) { + hash ^= ((uint64_t)val._limbs[i]) << ((i & 1) * 32); + hash = (hash << 7) | (hash >> 57); + } + return hash; +} + +// ============================================================================ +// Oracle Configuration Implementation +// ============================================================================ + +__host__ __device__ void oracle_config_t::set_default() { + check_overflow = true; + check_underflow = true; + check_div_zero = true; + check_unauthorized_access = true; + check_tx_origin = true; + check_selfdestruct = true; + check_reentrancy = true; + check_cross_function_reentrancy = true; + check_read_only_reentrancy = false; + check_erc20_issues = true; + check_erc721_issues = false; + check_ether_leak = true; + check_stuck_ether = true; + check_force_feed = true; + check_gas_issues = true; + min_severity = BugSeverity::LOW; + max_bugs_per_type = MAX_BUGS_PER_TYPE; + dedup_window_size = 1024; +} + +__host__ __device__ void oracle_config_t::enable_all() { + check_overflow = true; + check_underflow = true; + check_div_zero = true; + check_unauthorized_access = true; + check_tx_origin = true; + check_selfdestruct = true; + check_reentrancy = true; + check_cross_function_reentrancy = true; + check_read_only_reentrancy = true; + check_erc20_issues = true; + check_erc721_issues = true; + check_ether_leak = true; + check_stuck_ether = true; + check_force_feed = true; + check_gas_issues = true; + min_severity = BugSeverity::INFORMATIONAL; + max_bugs_per_type = MAX_BUGS_PER_TYPE; + dedup_window_size = 1024; +} + +__host__ __device__ void oracle_config_t::set_minimal() { + check_overflow = true; + check_underflow = true; + check_div_zero = false; + check_unauthorized_access = false; + check_tx_origin = false; + check_selfdestruct = true; + check_reentrancy = true; + check_cross_function_reentrancy = false; + check_read_only_reentrancy = false; + check_erc20_issues = false; + check_erc721_issues = false; + check_ether_leak = true; + check_stuck_ether = false; + check_force_feed = false; + check_gas_issues = false; + min_severity = BugSeverity::HIGH; + max_bugs_per_type = 64; + dedup_window_size = 256; +} + +// ============================================================================ +// Bug Storage Implementation +// ============================================================================ + +__host__ __device__ void bug_storage_t::init() { + bug_count = 0; + signature_idx = 0; + for (int i = 0; i <= (int)BugType::UNKNOWN; i++) { + type_counts[i] = 0; + } + for (int i = 0; i < 1024; i++) { + recent_signatures[i] = 0; + } +} + +__host__ __device__ bool bug_storage_t::is_duplicate(uint64_t signature) { + for (uint32_t i = 0; i < 1024; i++) { + if (recent_signatures[i] == signature) { + return true; + } + } + return false; +} + +__host__ __device__ bool bug_storage_t::add_bug(const detected_bug_t& bug) { + // Compute signature for deduplication + uint64_t signature = hash_word(bug.context.operand1) ^ + ((uint64_t)bug.type << 56) ^ + ((uint64_t)bug.location.pc << 32); + + // Check for duplicate + if (is_duplicate(signature)) { + return false; + } + + // Check if we have space + if (bug_count >= MAX_BUGS_TOTAL) { + return false; + } + + // Check per-type limit + if (type_counts[(int)bug.type] >= MAX_BUGS_PER_TYPE) { + return false; + } + + // Add bug +#ifdef __CUDA_ARCH__ + uint32_t idx = atomicAdd(&bug_count, 1); + if (idx >= MAX_BUGS_TOTAL) { + atomicSub(&bug_count, 1); + return false; + } + atomicAdd(&type_counts[(int)bug.type], 1); +#else + uint32_t idx = bug_count++; + type_counts[(int)bug.type]++; +#endif + + bugs[idx] = bug; + + // Add to dedup window + recent_signatures[signature_idx % 1024] = signature; + signature_idx++; + + return true; +} + +__host__ __device__ uint32_t bug_storage_t::count_by_type(BugType type) { + return type_counts[(int)type]; +} + +__host__ __device__ uint32_t bug_storage_t::count_by_severity(BugSeverity severity) { + uint32_t count = 0; + for (uint32_t i = 0; i < bug_count; i++) { + if (bugs[i].severity >= severity) count++; + } + return count; +} + +__host__ __device__ void bug_storage_t::clear() { + init(); +} + +// ============================================================================ +// Execution State Tracker Implementation +// ============================================================================ + +__host__ __device__ void execution_state_tracker_t::init() { + call_depth = 0; + num_storage_writes = 0; + num_tracked_addresses = 0; + in_external_call = false; + state_modified_before_call = false; + reentrancy_guard_slot = 0; + initial_gas = 0; + gas_used = 0; + last_call_success = false; + last_call_checked = true; +} + +__host__ __device__ void execution_state_tracker_t::push_call(const call_frame_t& frame) { + if (call_depth < MAX_CALL_DEPTH) { + call_stack[call_depth] = frame; + call_depth++; + if (frame.is_external) { + in_external_call = true; + } + } +} + +__host__ __device__ void execution_state_tracker_t::pop_call() { + if (call_depth > 0) { + call_depth--; + if (call_depth == 0) { + in_external_call = false; + } + } +} + +__host__ __device__ void execution_state_tracker_t::record_storage_write(const storage_write_t& write) { + if (num_storage_writes < MAX_STORAGE_WRITES) { + storage_writes[num_storage_writes++] = write; + state_modified_before_call = true; + } +} + +__host__ __device__ bool execution_state_tracker_t::check_reentrancy() { + // Check if we're in an external call and state was modified before + if (in_external_call && state_modified_before_call) { + // Check if any storage was written before the call and after + for (uint32_t i = 0; i < num_storage_writes; i++) { + if (storage_writes[i].call_depth < call_depth) { + // Storage write happened before current call depth + return true; // Potential reentrancy + } + } + } + return false; +} + +__host__ __device__ void execution_state_tracker_t::track_balance(const evm_word_t& address, + const evm_word_t& balance) { + // Find existing or add new + for (uint32_t i = 0; i < num_tracked_addresses; i++) { + if (equals(initial_balances[i], address)) { + copy_word(current_balances[i], balance); + return; + } + } + if (num_tracked_addresses < 64) { + copy_word(initial_balances[num_tracked_addresses], address); + copy_word(current_balances[num_tracked_addresses], balance); + num_tracked_addresses++; + } +} + +// ============================================================================ +// Oracle Detector Implementation +// ============================================================================ + +__host__ __device__ OracleDetector::OracleDetector(oracle_config_t* config, bug_storage_t* storage) + : config_(config), storage_(storage), current_tx_index_(0), current_sequence_id_(0) { + zero_word(current_sender_); + zero_word(current_receiver_); +} + +__host__ __device__ void OracleDetector::on_transaction_start( + const evm_word_t& sender, const evm_word_t& receiver, + const evm_word_t& value, const uint8_t* calldata, uint32_t calldata_len) { + copy_word(current_sender_, sender); + copy_word(current_receiver_, receiver); +} + +__host__ __device__ void OracleDetector::on_instruction( + uint32_t pc, uint8_t opcode, + const evm_word_t* stack, uint32_t stack_size, + execution_state_tracker_t* tracker) { + + // Handle different opcodes + switch (opcode) { + case OP_ADD: + if (stack_size >= 2 && config_->check_overflow) { + check_add(pc, stack[stack_size - 1], stack[stack_size - 2], stack[stack_size - 1]); + } + break; + case OP_SUB: + if (stack_size >= 2 && config_->check_underflow) { + check_sub(pc, stack[stack_size - 1], stack[stack_size - 2], stack[stack_size - 1]); + } + break; + case OP_MUL: + if (stack_size >= 2 && config_->check_overflow) { + check_mul(pc, stack[stack_size - 1], stack[stack_size - 2], stack[stack_size - 1]); + } + break; + case OP_DIV: + case OP_SDIV: + if (stack_size >= 2 && config_->check_div_zero) { + check_div(pc, stack[stack_size - 1], stack[stack_size - 2]); + } + break; + case OP_MOD: + case OP_SMOD: + if (stack_size >= 2 && config_->check_div_zero) { + check_mod(pc, stack[stack_size - 1], stack[stack_size - 2]); + } + break; + case OP_ORIGIN: + if (config_->check_tx_origin) { + on_origin(pc); + } + break; + default: + break; + } +} + +__host__ __device__ void OracleDetector::check_add(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result) { + if (check_add_overflow(a, b)) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.call_depth = 0; + location.contract_id = 0; + location.opcode = OP_ADD; + + bug_context_t context; + copy_word(context.operand1, a); + copy_word(context.operand2, b); + copy_word(context.result, result); + context.context_length = 0; + + report_bug(BugType::INTEGER_OVERFLOW, BugSeverity::HIGH, location, context, + "Integer overflow in ADD operation"); + } +} + +__host__ __device__ void OracleDetector::check_sub(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result) { + if (check_sub_underflow(a, b)) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.call_depth = 0; + location.contract_id = 0; + location.opcode = OP_SUB; + + bug_context_t context; + copy_word(context.operand1, a); + copy_word(context.operand2, b); + copy_word(context.result, result); + context.context_length = 0; + + report_bug(BugType::INTEGER_UNDERFLOW, BugSeverity::HIGH, location, context, + "Integer underflow in SUB operation"); + } +} + +__host__ __device__ void OracleDetector::check_mul(uint32_t pc, const evm_word_t& a, const evm_word_t& b, + const evm_word_t& result) { + if (check_mul_overflow(a, b)) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.call_depth = 0; + location.contract_id = 0; + location.opcode = OP_MUL; + + bug_context_t context; + copy_word(context.operand1, a); + copy_word(context.operand2, b); + copy_word(context.result, result); + context.context_length = 0; + + report_bug(BugType::INTEGER_OVERFLOW, BugSeverity::HIGH, location, context, + "Integer overflow in MUL operation"); + } +} + +__host__ __device__ void OracleDetector::check_div(uint32_t pc, const evm_word_t& a, const evm_word_t& b) { + if (is_zero(b)) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.call_depth = 0; + location.contract_id = 0; + location.opcode = OP_DIV; + + bug_context_t context; + copy_word(context.operand1, a); + copy_word(context.operand2, b); + context.context_length = 0; + + report_bug(BugType::DIVISION_BY_ZERO, BugSeverity::MEDIUM, location, context, + "Division by zero"); + } +} + +__host__ __device__ void OracleDetector::check_mod(uint32_t pc, const evm_word_t& a, const evm_word_t& b) { + if (is_zero(b)) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.call_depth = 0; + location.contract_id = 0; + location.opcode = OP_MOD; + + bug_context_t context; + copy_word(context.operand1, a); + copy_word(context.operand2, b); + context.context_length = 0; + + report_bug(BugType::MODULO_BY_ZERO, BugSeverity::MEDIUM, location, context, + "Modulo by zero"); + } +} + +__host__ __device__ void OracleDetector::check_exp(uint32_t pc, const evm_word_t& base, const evm_word_t& exp, + const evm_word_t& result) { + // Check if exponentiation would overflow + // Simplified check: if base > 1 and exp is large + if (!is_zero(base) && !is_zero(exp)) { + bool base_gt_1 = false; + for (int i = 7; i >= 0; i--) { + if (base._limbs[i] > 0) { + if (base._limbs[i] > 1 || i > 0) { + base_gt_1 = true; + } + break; + } + } + if (base_gt_1 && exp._limbs[0] > 255) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.opcode = OP_EXP; + + bug_context_t context; + copy_word(context.operand1, base); + copy_word(context.operand2, exp); + copy_word(context.result, result); + context.context_length = 0; + + report_bug(BugType::EXPONENT_OVERFLOW, BugSeverity::MEDIUM, location, context, + "Potential overflow in EXP operation"); + } + } +} + +__host__ __device__ void OracleDetector::on_sload(uint32_t pc, const evm_word_t& slot, const evm_word_t& value, + execution_state_tracker_t* tracker) { + // Track storage reads for reentrancy detection +} + +__host__ __device__ void OracleDetector::on_sstore(uint32_t pc, const evm_word_t& slot, + const evm_word_t& old_value, const evm_word_t& new_value, + execution_state_tracker_t* tracker) { + if (tracker) { + storage_write_t write; + copy_word(write.slot, slot); + copy_word(write.old_value, old_value); + copy_word(write.new_value, new_value); + write.pc = pc; + write.call_depth = tracker->call_depth; + tracker->record_storage_write(write); + } +} + +__host__ __device__ void OracleDetector::on_call_start(uint32_t pc, uint8_t opcode, + const evm_word_t& target, const evm_word_t& value, + const evm_word_t& gas, + execution_state_tracker_t* tracker) { + if (!config_->check_reentrancy || !tracker) return; + + call_frame_t frame; + copy_word(frame.caller, current_sender_); + copy_word(frame.callee, target); + copy_word(frame.value, value); + frame.pc = pc; + frame.opcode = opcode; + frame.has_state_change = tracker->num_storage_writes > 0; + frame.is_external = !is_reentrancy_safe_call(opcode, target); + + tracker->push_call(frame); + + // Check for reentrancy pattern + if (frame.is_external && frame.has_state_change) { + // State was modified before external call - potential reentrancy + if (config_->check_reentrancy) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.call_depth = tracker->call_depth; + location.opcode = opcode; + + bug_context_t context; + copy_word(context.callee, target); + copy_word(context.value, value); + context.context_length = 0; + + report_bug(BugType::REENTRANCY_ETH, BugSeverity::CRITICAL, location, context, + "Potential reentrancy: state modified before external call"); + } + } +} + +__host__ __device__ void OracleDetector::on_call_end(uint32_t pc, bool success, const uint8_t* return_data, + uint32_t return_size, execution_state_tracker_t* tracker) { + if (tracker) { + tracker->last_call_success = success; + tracker->last_call_checked = false; + tracker->pop_call(); + } + + // Check for unchecked return value + if (!success && tracker && !tracker->last_call_checked) { + // Will be checked on next ISZERO or comparison + } +} + +__host__ __device__ void OracleDetector::on_balance_change(const evm_word_t& address, + const evm_word_t& old_balance, + const evm_word_t& new_balance) { + // Track for ether leak detection +} + +__host__ __device__ void OracleDetector::on_selfdestruct(uint32_t pc, const evm_word_t& beneficiary, + const evm_word_t& balance) { + if (!config_->check_selfdestruct) return; + + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.opcode = OP_SELFDESTRUCT; + + bug_context_t context; + copy_word(context.callee, beneficiary); + copy_word(context.value, balance); + context.context_length = 0; + + // Check if selfdestruct is called with non-trivial value + if (!is_zero(balance)) { + report_bug(BugType::SELFDESTRUCT_ETH_LEAK, BugSeverity::HIGH, location, context, + "SELFDESTRUCT with ETH balance"); + } +} + +__host__ __device__ void OracleDetector::on_create(uint32_t pc, const evm_word_t& value, + const evm_word_t& new_address) { + // Track contract creation +} + +__host__ __device__ void OracleDetector::on_origin(uint32_t pc) { + if (!config_->check_tx_origin) return; + + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.opcode = OP_ORIGIN; + + bug_context_t context; + context.context_length = 0; + + report_bug(BugType::TX_ORIGIN_AUTH, BugSeverity::MEDIUM, location, context, + "tx.origin used (potential phishing vulnerability)"); +} + +__host__ __device__ void OracleDetector::on_transaction_end( + bool success, const uint8_t* return_data, uint32_t return_size, + uint64_t gas_used, execution_state_tracker_t* tracker) { + current_tx_index_++; +} + +__host__ __device__ void OracleDetector::check_custom_invariant(uint32_t invariant_id, bool condition, + const char* description) { + if (!condition) { + bug_location_t location; + location.pc = 0; + location.tx_index = current_tx_index_; + + bug_context_t context; + context.context_length = 0; + + report_bug(BugType::INVARIANT_VIOLATION, BugSeverity::HIGH, location, context, description); + } +} + +__host__ __device__ void OracleDetector::report_bug(BugType type, BugSeverity severity, + const bug_location_t& location, + const bug_context_t& context, + const char* description) { + if ((int)severity < (int)config_->min_severity) return; + + detected_bug_t bug; + bug.type = type; + bug.severity = severity; + bug.location = location; + bug.context = context; + bug.timestamp = 0; // Would use real timestamp in production + bug.input_hash = hash_word(context.operand1); + bug.sequence_id = current_sequence_id_; + bug.confirmed = false; + + // Copy description + for (int i = 0; i < 255 && description[i]; i++) { + bug.description[i] = description[i]; + bug.description[i + 1] = '\0'; + } + + storage_->add_bug(bug); +} + +__host__ __device__ uint64_t OracleDetector::compute_bug_signature(BugType type, uint32_t pc, + const evm_word_t& key_value) { + return ((uint64_t)type << 56) ^ ((uint64_t)pc << 32) ^ hash_word(key_value); +} + +__host__ __device__ BugSeverity OracleDetector::determine_severity(BugType type, const bug_context_t& context) { + switch (type) { + case BugType::REENTRANCY_ETH: + case BugType::UNAUTHORIZED_SELFDESTRUCT: + return BugSeverity::CRITICAL; + case BugType::INTEGER_OVERFLOW: + case BugType::INTEGER_UNDERFLOW: + case BugType::ETHER_LEAK: + return BugSeverity::HIGH; + case BugType::TX_ORIGIN_AUTH: + case BugType::DIVISION_BY_ZERO: + return BugSeverity::MEDIUM; + default: + return BugSeverity::LOW; + } +} + +__host__ __device__ bool OracleDetector::is_reentrancy_safe_call(uint8_t opcode, const evm_word_t& target) { + // STATICCALL is always safe (no state changes) + if (opcode == OP_STATICCALL) return true; + + // Check if target is a known safe address (precompiles) + bool is_precompile = true; + for (int i = 1; i < 8; i++) { + if (target._limbs[i] != 0) { + is_precompile = false; + break; + } + } + if (is_precompile && target._limbs[0] >= 1 && target._limbs[0] <= 9) { + return true; + } + + return false; +} + +__host__ __device__ bool OracleDetector::is_reentrancy_guard_pattern( + const evm_word_t& slot, const evm_word_t& old_value, const evm_word_t& new_value) { + // Common pattern: slot changes from 1->2 (enter) or 2->1 (exit) + if (is_zero(old_value) && !is_zero(new_value)) { + return true; // Entering critical section + } + if (!is_zero(old_value) && is_zero(new_value)) { + return true; // Exiting critical section + } + return false; +} + +__host__ __device__ bool OracleDetector::check_add_overflow(const evm_word_t& a, const evm_word_t& b) { + // Overflow if a + b < a (when both are non-negative) + uint64_t carry = 0; + for (int i = 0; i < 8; i++) { + uint64_t sum = (uint64_t)a._limbs[i] + (uint64_t)b._limbs[i] + carry; + carry = sum >> 32; + } + return carry > 0; +} + +__host__ __device__ bool OracleDetector::check_mul_overflow(const evm_word_t& a, const evm_word_t& b) { + // Simplified check: if both have high bits set, likely overflow + // More accurate would require full 512-bit multiplication + int a_high = -1, b_high = -1; + for (int i = 7; i >= 0; i--) { + if (a._limbs[i] != 0 && a_high < 0) a_high = i; + if (b._limbs[i] != 0 && b_high < 0) b_high = i; + } + // If a_high + b_high >= 8, result needs more than 256 bits + if (a_high >= 0 && b_high >= 0 && a_high + b_high >= 7) { + return true; + } + return false; +} + +__host__ __device__ bool OracleDetector::check_sub_underflow(const evm_word_t& a, const evm_word_t& b) { + // Underflow if a < b + return less_than(a, b); +} + +// ============================================================================ +// Specialized Oracle Implementations +// ============================================================================ + +__host__ __device__ ArithmeticOracle::ArithmeticOracle(oracle_config_t* config, bug_storage_t* storage) + : OracleDetector(config, storage) {} + +__host__ __device__ void ArithmeticOracle::verify_safe_add(uint32_t pc, const evm_word_t& a, + const evm_word_t& b, const evm_word_t& result) { + check_add(pc, a, b, result); +} + +__host__ __device__ void ArithmeticOracle::verify_safe_sub(uint32_t pc, const evm_word_t& a, + const evm_word_t& b, const evm_word_t& result) { + check_sub(pc, a, b, result); +} + +__host__ __device__ void ArithmeticOracle::verify_safe_mul(uint32_t pc, const evm_word_t& a, + const evm_word_t& b, const evm_word_t& result) { + check_mul(pc, a, b, result); +} + +__host__ __device__ ReentrancyOracle::ReentrancyOracle(oracle_config_t* config, bug_storage_t* storage) + : OracleDetector(config, storage), has_reentrancy_guard_(false) { + zero_word(guard_slot_); +} + +__host__ __device__ void ReentrancyOracle::track_external_call(uint32_t pc, const evm_word_t& target, + execution_state_tracker_t* tracker) { + check_reentrancy_pattern(tracker); +} + +__host__ __device__ void ReentrancyOracle::track_state_modification(uint32_t pc, const evm_word_t& slot, + execution_state_tracker_t* tracker) { + if (tracker) { + tracker->state_modified_before_call = true; + } +} + +__host__ __device__ void ReentrancyOracle::check_reentrancy_pattern(execution_state_tracker_t* tracker) { + if (!tracker || !config_->check_reentrancy) return; + + if (tracker->check_reentrancy()) { + bug_location_t location; + location.pc = 0; + location.tx_index = current_tx_index_; + location.call_depth = tracker->call_depth; + + bug_context_t context; + context.context_length = 0; + + report_bug(BugType::REENTRANCY_ETH, BugSeverity::CRITICAL, location, context, + "Reentrancy detected: state modified before and during external call"); + } +} + +__host__ __device__ AccessControlOracle::AccessControlOracle(oracle_config_t* config, bug_storage_t* storage) + : OracleDetector(config, storage), authorization_checked_(false), num_authorized_(0) {} + +__host__ __device__ void AccessControlOracle::on_privileged_operation(uint32_t pc, uint8_t opcode, + const evm_word_t& sender) { + if (!config_->check_unauthorized_access) return; + + if (!authorization_checked_) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + location.opcode = opcode; + + bug_context_t context; + copy_word(context.caller, sender); + context.context_length = 0; + + report_bug(BugType::MISSING_ACCESS_CONTROL, BugSeverity::HIGH, location, context, + "Privileged operation without authorization check"); + } +} + +__host__ __device__ void AccessControlOracle::on_authorization_check(uint32_t pc, + const evm_word_t& checked_address) { + authorization_checked_ = true; + if (num_authorized_ < 16) { + copy_word(authorized_addresses_[num_authorized_++], checked_address); + } +} + +__host__ __device__ void AccessControlOracle::verify_access_control(uint32_t pc, uint8_t operation) { + on_privileged_operation(pc, operation, current_sender_); +} + +__host__ __device__ TokenOracle::TokenOracle(oracle_config_t* config, bug_storage_t* storage) + : OracleDetector(config, storage), total_supply_slot_(0) { + zero_word(tracked_total_supply_); +} + +__host__ __device__ void TokenOracle::check_transfer(uint32_t pc, const evm_word_t& from, + const evm_word_t& to, const evm_word_t& amount) { + if (!config_->check_erc20_issues) return; + + // Check transfer to zero address + if (is_zero(to)) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + + bug_context_t context; + copy_word(context.operand1, from); + copy_word(context.operand2, to); + copy_word(context.result, amount); + context.context_length = 0; + + report_bug(BugType::ERC20_TRANSFER_TO_ZERO, BugSeverity::MEDIUM, location, context, + "Token transfer to zero address"); + } +} + +__host__ __device__ void TokenOracle::check_approve(uint32_t pc, const evm_word_t& owner, + const evm_word_t& spender, const evm_word_t& amount) { + // Check for approval race condition (non-zero to non-zero) + // Would need to track previous allowance +} + +__host__ __device__ void TokenOracle::check_transferFrom(uint32_t pc, const evm_word_t& from, + const evm_word_t& to, const evm_word_t& amount, + const evm_word_t& allowance) { + if (!config_->check_erc20_issues) return; + + // Check if transfer exceeds allowance + if (greater_than(amount, allowance)) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + + bug_context_t context; + copy_word(context.operand1, amount); + copy_word(context.operand2, allowance); + context.context_length = 0; + + report_bug(BugType::ERC20_BURN_WITHOUT_APPROVAL, BugSeverity::HIGH, location, context, + "Transfer amount exceeds allowance"); + } +} + +__host__ __device__ void TokenOracle::track_balance_change(const evm_word_t& address, + const evm_word_t& old_balance, + const evm_word_t& new_balance) { + // Track for total supply consistency checking +} + +__host__ __device__ void TokenOracle::check_total_supply_consistency() { + // Check that sum of balances equals total supply +} + +__host__ __device__ FundSafetyOracle::FundSafetyOracle(oracle_config_t* config, bug_storage_t* storage) + : OracleDetector(config, storage), has_withdrawal_function_(false) { + zero_word(total_eth_received_); + zero_word(total_eth_sent_); +} + +__host__ __device__ void FundSafetyOracle::on_eth_received(const evm_word_t& from, const evm_word_t& amount) { + // Add to total received + uint64_t carry = 0; + for (int i = 0; i < 8; i++) { + uint64_t sum = (uint64_t)total_eth_received_._limbs[i] + (uint64_t)amount._limbs[i] + carry; + total_eth_received_._limbs[i] = (uint32_t)sum; + carry = sum >> 32; + } +} + +__host__ __device__ void FundSafetyOracle::on_eth_sent(uint32_t pc, const evm_word_t& to, + const evm_word_t& amount) { + if (!config_->check_ether_leak) return; + + // Add to total sent + uint64_t carry = 0; + for (int i = 0; i < 8; i++) { + uint64_t sum = (uint64_t)total_eth_sent_._limbs[i] + (uint64_t)amount._limbs[i] + carry; + total_eth_sent_._limbs[i] = (uint32_t)sum; + carry = sum >> 32; + } + + // Check if sent more than received (potential leak) + if (greater_than(total_eth_sent_, total_eth_received_)) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + + bug_context_t context; + copy_word(context.operand1, total_eth_received_); + copy_word(context.operand2, total_eth_sent_); + copy_word(context.callee, to); + copy_word(context.value, amount); + context.context_length = 0; + + report_bug(BugType::ETHER_LEAK, BugSeverity::HIGH, location, context, + "More ETH sent than received"); + } +} + +__host__ __device__ void FundSafetyOracle::check_stuck_ether(const evm_word_t& contract_balance) { + if (!config_->check_stuck_ether) return; + + // Check if contract has balance but no withdrawal mechanism detected + if (!is_zero(contract_balance) && !has_withdrawal_function_) { + bug_location_t location; + location.pc = 0; + location.tx_index = current_tx_index_; + + bug_context_t context; + copy_word(context.value, contract_balance); + context.context_length = 0; + + report_bug(BugType::STUCK_ETHER, BugSeverity::MEDIUM, location, context, + "Contract has ETH balance but no withdrawal function detected"); + } +} + +__host__ __device__ void FundSafetyOracle::check_unexpected_eth(const evm_word_t& expected, + const evm_word_t& actual) { + if (!config_->check_force_feed) return; + + if (!equals(expected, actual)) { + bug_location_t location; + location.pc = 0; + location.tx_index = current_tx_index_; + + bug_context_t context; + copy_word(context.expected, expected); + copy_word(context.result, actual); + context.context_length = 0; + + report_bug(BugType::UNEXPECTED_ETH_BALANCE, BugSeverity::MEDIUM, location, context, + "Unexpected ETH balance (possible force-feeding)"); + } +} + +__host__ __device__ void FundSafetyOracle::check_selfdestruct_safety(uint32_t pc, + const evm_word_t& beneficiary) { + // Already handled in base class on_selfdestruct +} + +__host__ __device__ GasOracle::GasOracle(oracle_config_t* config, bug_storage_t* storage) + : OracleDetector(config, storage), max_gas_observed_(0), num_loops_(0) {} + +__host__ __device__ void GasOracle::on_gas_usage(uint32_t pc, uint64_t gas_used, uint64_t gas_remaining) { + if (gas_used > max_gas_observed_) { + max_gas_observed_ = gas_used; + } +} + +__host__ __device__ void GasOracle::check_unbounded_loop(uint32_t pc, uint32_t iteration_count) { + if (!config_->check_gas_issues) return; + + // Find or create loop entry + int loop_idx = -1; + for (uint32_t i = 0; i < num_loops_; i++) { + if (loop_pcs_[i] == pc) { + loop_idx = i; + break; + } + } + if (loop_idx < 0 && num_loops_ < 64) { + loop_idx = num_loops_++; + loop_pcs_[loop_idx] = pc; + loop_iteration_counts_[loop_idx] = 0; + } + + if (loop_idx >= 0) { + loop_iteration_counts_[loop_idx] = iteration_count; + + // Check for potentially unbounded loop (> 1000 iterations) + if (iteration_count > 1000) { + bug_location_t location; + location.pc = pc; + location.tx_index = current_tx_index_; + + bug_context_t context; + context.operand1._limbs[0] = iteration_count; + for (int i = 1; i < 8; i++) context.operand1._limbs[i] = 0; + context.context_length = 0; + + report_bug(BugType::UNBOUNDED_LOOP, BugSeverity::MEDIUM, location, context, + "Potentially unbounded loop detected"); + } + } +} + +__host__ __device__ void GasOracle::check_block_gas_limit(uint64_t total_gas) { + if (!config_->check_gas_issues) return; + + // Ethereum block gas limit is around 30 million + if (total_gas > 30000000) { + bug_location_t location; + location.pc = 0; + location.tx_index = current_tx_index_; + + bug_context_t context; + context.operand1._limbs[0] = (uint32_t)(total_gas & 0xFFFFFFFF); + context.operand1._limbs[1] = (uint32_t)(total_gas >> 32); + for (int i = 2; i < 8; i++) context.operand1._limbs[i] = 0; + context.context_length = 0; + + report_bug(BugType::BLOCK_GAS_LIMIT, BugSeverity::HIGH, location, context, + "Transaction exceeds block gas limit"); + } +} + +__host__ __device__ void GasOracle::check_call_gas(uint32_t pc, uint64_t gas_forwarded) { + // Check if 1/64th rule is violated or gas is unexpectedly low +} + +// ============================================================================ +// Composite Oracle Implementation +// ============================================================================ + +__host__ __device__ CompositeOracle::CompositeOracle(oracle_config_t* config, bug_storage_t* storage) + : config_(config), storage_(storage), + arithmetic_(config, storage), + reentrancy_(config, storage), + access_control_(config, storage), + token_(config, storage), + fund_safety_(config, storage), + gas_(config, storage) {} + +__host__ __device__ void CompositeOracle::init() { + storage_->init(); +} + +__host__ __device__ void CompositeOracle::on_transaction_start( + const evm_word_t& sender, const evm_word_t& receiver, + const evm_word_t& value, const uint8_t* calldata, uint32_t calldata_len) { + + arithmetic_.on_transaction_start(sender, receiver, value, calldata, calldata_len); + reentrancy_.on_transaction_start(sender, receiver, value, calldata, calldata_len); + access_control_.on_transaction_start(sender, receiver, value, calldata, calldata_len); + token_.on_transaction_start(sender, receiver, value, calldata, calldata_len); + fund_safety_.on_transaction_start(sender, receiver, value, calldata, calldata_len); + gas_.on_transaction_start(sender, receiver, value, calldata, calldata_len); +} + +__host__ __device__ void CompositeOracle::on_instruction( + uint32_t pc, uint8_t opcode, + const evm_word_t* stack, uint32_t stack_size, + execution_state_tracker_t* tracker) { + + arithmetic_.on_instruction(pc, opcode, stack, stack_size, tracker); + // Other oracles hook into specific opcodes via their own mechanisms +} + +__host__ __device__ void CompositeOracle::on_transaction_end( + bool success, const uint8_t* return_data, uint32_t return_size, + uint64_t gas_used, execution_state_tracker_t* tracker) { + + arithmetic_.on_transaction_end(success, return_data, return_size, gas_used, tracker); + reentrancy_.on_transaction_end(success, return_data, return_size, gas_used, tracker); + gas_.on_transaction_end(success, return_data, return_size, gas_used, tracker); +} + +// ============================================================================ +// CUDA Kernel Implementations +// ============================================================================ + +__global__ void kernel_check_arithmetic( + uint8_t opcode, + const evm_word_t* operands_a, + const evm_word_t* operands_b, + const evm_word_t* results, + uint32_t* pcs, + uint32_t num_operations, + bug_storage_t* bug_storage, + oracle_config_t* config) { + + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_operations) return; + + ArithmeticOracle oracle(config, bug_storage); + + switch (opcode) { + case OP_ADD: + oracle.verify_safe_add(pcs[idx], operands_a[idx], operands_b[idx], results[idx]); + break; + case OP_SUB: + oracle.verify_safe_sub(pcs[idx], operands_a[idx], operands_b[idx], results[idx]); + break; + case OP_MUL: + oracle.verify_safe_mul(pcs[idx], operands_a[idx], operands_b[idx], results[idx]); + break; + } +} + +__global__ void kernel_check_reentrancy( + execution_state_tracker_t* trackers, + uint32_t num_instances, + bug_storage_t* bug_storage, + oracle_config_t* config) { + + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_instances) return; + + ReentrancyOracle oracle(config, bug_storage); + oracle.check_reentrancy_pattern(&trackers[idx]); +} + +__global__ void kernel_check_invariants( + const evm_word_t* pre_state, + const evm_word_t* post_state, + const uint32_t* invariant_types, + uint32_t num_invariants, + bug_storage_t* bug_storage) { + + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_invariants) return; + + // Check specific invariant based on type + uint32_t type = invariant_types[idx]; + + bool violated = false; + switch (type) { + case 0: // EQUALS + violated = !equals(pre_state[idx], post_state[idx]); + break; + case 1: // NOT_LESS_THAN + violated = less_than(post_state[idx], pre_state[idx]); + break; + case 2: // NOT_GREATER_THAN + violated = greater_than(post_state[idx], pre_state[idx]); + break; + case 3: // NON_ZERO + violated = is_zero(post_state[idx]); + break; + } + + if (violated) { + detected_bug_t bug; + bug.type = BugType::INVARIANT_VIOLATION; + bug.severity = BugSeverity::HIGH; + bug.location.pc = 0; + bug.location.tx_index = 0; + copy_word(bug.context.expected, pre_state[idx]); + copy_word(bug.context.result, post_state[idx]); + bug_storage->add_bug(bug); + } +} + +// ============================================================================ +// Host Helper Functions +// ============================================================================ + +__host__ oracle_config_t* allocate_oracle_config() { + oracle_config_t* config; + cudaMallocManaged(&config, sizeof(oracle_config_t)); + config->set_default(); + return config; +} + +__host__ bug_storage_t* allocate_bug_storage() { + bug_storage_t* storage; + cudaMallocManaged(&storage, sizeof(bug_storage_t)); + storage->init(); + return storage; +} + +__host__ execution_state_tracker_t* allocate_trackers(uint32_t num_instances) { + execution_state_tracker_t* trackers; + cudaMallocManaged(&trackers, num_instances * sizeof(execution_state_tracker_t)); + for (uint32_t i = 0; i < num_instances; i++) { + trackers[i].init(); + } + return trackers; +} + +__host__ void free_oracle_config(oracle_config_t* config) { + if (config) cudaFree(config); +} + +__host__ void free_bug_storage(bug_storage_t* storage) { + if (storage) cudaFree(storage); +} + +__host__ void free_trackers(execution_state_tracker_t* trackers) { + if (trackers) cudaFree(trackers); +} + +__host__ void copy_bugs_to_host(detected_bug_t* host_bugs, const bug_storage_t* device_storage) { + cudaMemcpy(host_bugs, device_storage->bugs, + device_storage->bug_count * sizeof(detected_bug_t), + cudaMemcpyDeviceToHost); +} + +__host__ void print_bug_report(const bug_storage_t* storage) { + printf("\n========== BUG REPORT ==========\n"); + printf("Total bugs found: %u\n\n", storage->bug_count); + + const char* severity_names[] = {"INFO", "LOW", "MEDIUM", "HIGH", "CRITICAL"}; + const char* type_names[] = { + "INTEGER_OVERFLOW", "INTEGER_UNDERFLOW", "DIVISION_BY_ZERO", "MODULO_BY_ZERO", + "EXPONENT_OVERFLOW", "", "", "", "", "", + "UNAUTHORIZED_CALL", "UNAUTHORIZED_SELFDESTRUCT", "UNAUTHORIZED_DELEGATECALL", + "TX_ORIGIN_AUTH", "MISSING_ACCESS_CONTROL", "", "", "", "", "", + "REENTRANCY_ETH", "REENTRANCY_ERC20", "REENTRANCY_CROSS_FUNCTION", + "REENTRANCY_CROSS_CONTRACT", "READ_ONLY_REENTRANCY" + }; + + for (uint32_t i = 0; i < storage->bug_count; i++) { + const detected_bug_t& bug = storage->bugs[i]; + printf("Bug #%u:\n", i + 1); + printf(" Type: %s\n", ((int)bug.type < 25) ? type_names[(int)bug.type] : "UNKNOWN"); + printf(" Severity: %s\n", severity_names[(int)bug.severity]); + printf(" PC: %u\n", bug.location.pc); + printf(" TX Index: %u\n", bug.location.tx_index); + printf(" Description: %s\n", bug.description); + printf("\n"); + } +} + +__host__ void export_bugs_json(const bug_storage_t* storage, const char* filename) { + FILE* f = fopen(filename, "w"); + if (!f) return; + + fprintf(f, "{\n \"bug_count\": %u,\n \"bugs\": [\n", storage->bug_count); + + for (uint32_t i = 0; i < storage->bug_count; i++) { + const detected_bug_t& bug = storage->bugs[i]; + fprintf(f, " {\n"); + fprintf(f, " \"type\": %u,\n", (unsigned)bug.type); + fprintf(f, " \"severity\": %u,\n", (unsigned)bug.severity); + fprintf(f, " \"pc\": %u,\n", bug.location.pc); + fprintf(f, " \"tx_index\": %u,\n", bug.location.tx_index); + fprintf(f, " \"description\": \"%s\"\n", bug.description); + fprintf(f, " }%s\n", (i < storage->bug_count - 1) ? "," : ""); + } + + fprintf(f, " ]\n}\n"); + fclose(f); +} + +} // namespace fuzzing +} // namespace CuEVM diff --git a/Dockerfile.ngc b/Dockerfile.ngc new file mode 100644 index 0000000..52dfe34 --- /dev/null +++ b/Dockerfile.ngc @@ -0,0 +1,14 @@ +FROM nvcr.io/nvidia/pytorch:25.12-py3 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + git \ + libgmp-dev \ + libcjson-dev \ + ninja-build \ + && rm -rf /var/lib/apt/lists/* + +RUN python3 -m ensurepip --upgrade \ + && python3 -m pip install --no-cache-dir --upgrade cmake==4.2.1 + +WORKDIR /workspaces/CuEVM diff --git a/README.md b/README.md index 31e0bae..08a5bea 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,10 @@ Cuda implementation of EVM bytecode executor ## Prerequisites -- CUDA Toolkit (Version 12.0+, because we use `--std c++20`) -- A CUDA-capable GPU (CUDA compute capabilily 7+ other older GPUs compability are not tested fully) -- A C++ compiler compatible with the CUDA Toolkit (gcc/g++ version 10+) +- CUDA Toolkit 13.1 Update 1+ (C++20 support, SM 103 for NVIDIA B300) +- CMake 4.2.1+ (install from Kitware or `python3 -m pip install --user cmake==4.2.1`) +- A CUDA-capable GPU (CUDA compute capability 10.3/SM 103 for B300; use 103-real;103-virtual for SASS + PTX) +- A C++ compiler compatible with the CUDA Toolkit (GCC 11/12 or Clang 16) - For docker image, you don't need the above but the system with docker installed ## Compile and Build binary @@ -17,11 +18,11 @@ There are two methods, one requires installing all prequisited in the system, th Building on Ubuntu (with sudo): * Setup required libraries: `sudo apt install libgmp-dev` * Setup cJSON: `sudo apt install libcjson-dev` -* Use cmake to build the binary (Adjust `-DCUDA_COMPUTE_CAPABILITY=86` according to your GPU compute capability number): +* Use cmake to build the binary (Adjust `-DCUDA_COMPUTE_CAPABILITY="103-real;103-virtual"` according to your GPU compute capability number): ``` bash cmake -S . -B build -DTESTS=OFF -DGPU=ON -DCPU=OFF \ - -DCUDA_COMPUTE_CAPABILITY=86 + -DCUDA_COMPUTE_CAPABILITY="103-real;103-virtual" -DENABLE_EIP_3155_OPTIONAL=OFF \ -DENABLE_EIP_3155=ON \ -DENABLE_PAIRING_CODE=ON @@ -33,12 +34,16 @@ Building without sudo is also possible with extra configuration and modification #### Building using docker image -* Pull the docker image first: `docker pull augustus/goevmlab-cuevm:20241008` -* Run and mount the current code folder `docker run -it -v $(pwd):/workspaces/CuEVM augustus/goevmlab-cuevm:20241008` -* Inside the docker container, you can build the code using the same commands as above (Adjust `-DCUDA_COMPUTE_CAPABILITY=86` according to your GPU compute capability number): +* Recommended production baseline: NVIDIA NGC PyTorch 25.12 (CUDA 13.1, Ubuntu 24.04) +* Build the thin CuEVM image from the curated base: +``` bash +docker build -f Dockerfile.ngc -t cuevm-ngc . +``` +* Run and mount the current code folder `docker run --gpus all -it -v $(pwd):/workspaces/CuEVM cuevm-ngc` +* Inside the docker container, you can build the code using the same commands as above (Adjust `-DCUDA_COMPUTE_CAPABILITY="103-real;103-virtual"` according to your GPU compute capability number): ``` bash cmake -S . -B build -DTESTS=OFF -DGPU=ON -DCPU=OFF \ - -DCUDA_COMPUTE_CAPABILITY=86 \ + -DCUDA_COMPUTE_CAPABILITY="103-real;103-virtual" \ -DENABLE_EIP_3155_OPTIONAL=OFF \ -DENABLE_EIP_3155=ON \ -DENABLE_PAIRING_CODE=ON @@ -87,15 +92,19 @@ The execution trace and output state will be printed to the stdout, you can use [Run Google Colab demo using free GPU](https://colab.research.google.com/drive/1W_3zKOJR2Jpv_6SoM0cmOFgVHP2b7rny?usp=sharing) +## Fork correctness and differential validation + +CuEVM currently supports fork rules up to Cancun (set `-DEVM_VERSION=CANCUN` when needed). Osaka/Fusaka execution rules are not yet implemented. For post-Fusaka or mainnet-like fuzzing, treat GPU results as throughput candidates and re-run on a CPU reference EVM (geth/revm) with EIP-3155 traces enabled to confirm correctness. + ## Testing using ethtest The script `scripts/run-ethtest-by-fork` can be used to run the tests from the -[ethereum/tests](https://github.com/ethereum/tests/tree/shanghai/GeneralStateTests). It +[ethereum/tests](https://github.com/ethereum/tests/tree/shanghai/GeneralStateTests) branch that matches the selected fork (examples below use Shanghai). It compares the traces from the outputs of CuEVM and `geth` without stateRoot. Requirements: -- Shanghai branch of [ethereum/tests](https://github.com/ethereum/tests/tree/shanghai/GeneralStateTests) +- A matching `ethereum/tests` fork branch (e.g. Shanghai or Cancun) - [goevmlab with CuEVM driver](https://github.com/cassc/goevmlab/tree/add-cuevm) The following will run all the tests in `ethereum/tests/GeneralStateTests`, note that this may take a few hours: diff --git a/fuzzing/fuzzer.py b/fuzzing/fuzzer.py index 74416ee..61633d6 100644 --- a/fuzzing/fuzzer.py +++ b/fuzzing/fuzzer.py @@ -85,7 +85,7 @@ def __init__(self, contract_source, num_instances=2, timeout=10, \ self.ast_parser = self.library.ast_parser self.contract_name = self.library.contract_name self.timeout = timeout # in seconds - self.parse_fuzzing_confg(config) + self.parse_fuzzing_config(config) self.abi_list = {} # mapping from function to input types for abi encoding if test_case_file: self.run_test_case(test_case_file) @@ -218,10 +218,15 @@ def post_process_input(self, tx_data, inputs, function): "inputs": copy.deepcopy(inputs) }) - tx_data.append({ + tx_entry = { "data": get_transaction_data_from_processed_abi(self.abi_list, function, inputs), "value": [hex(0)] - }) + } + receiver = self.select_receiver() + if receiver: + tx_entry["to"] = receiver + self.raw_inputs[-1]["to"] = receiver + tx_data.append(tx_entry) def run_seed_round(self): @@ -289,28 +294,116 @@ def prepare_tx(self, test_case): # print ("testcase" , test_case) return tx - def parse_fuzzing_confg(self, config): - ... + def parse_fuzzing_config(self, config): + with open(config) as f: + config_data = json.load(f) + self.sequence_length = int(config_data.get("sequence_length", 1)) + self.receivers = config_data.get("receivers", []) + self.invariants = config_data.get("invariants", {}) + self.target_address = config_data.get( + "target_address", + self.library.instances[0]["transaction"]["to"], + ) + self.storage_invariants = self.invariants.get("storage", {}) + self.balance_invariants = self.invariants.get("balance", {}) + self.invariant_log_shown = False + + def select_receiver(self): + if not self.receivers: + return None + return random.choice(self.receivers) + + def record_invariant_bug(self, bug_type, detail): + bug_id = f"{bug_type}:{detail}" + if bug_id in self.detected_bugs: + return + self.detected_bugs[bug_id] = DetectedBug( + pc=-1, + bug_type=bug_type, + input={"detail": detail}, + line_info=[], + ) + + def to_int(self, value): + if value is None: + return None + if isinstance(value, int): + return value + if isinstance(value, str) and value.startswith("0x"): + return int(value, 16) + try: + return int(value) + except (ValueError, TypeError): + return None + + def check_invariants(self, step): + if not self.invariants or not self.library.last_result_state: + if not self.invariant_log_shown and DEBUG[0] == "v": + print("Invariant checks skipped (no invariants or result state).") + self.invariant_log_shown = True + return + post_states = self.library.last_result_state.get("post", []) + for idx, item in enumerate(post_states): + state = item.get("state", {}) + target_state = state.get(self.target_address, {}) + storage = target_state.get("storage", {}) + storage_equals = self.storage_invariants.get("equals", {}) + for key, expected in storage_equals.items(): + current = self.to_int(storage.get(key, "0x0")) + expected_val = self.to_int(expected) + if current != expected_val: + self.record_invariant_bug( + "storage_equals", + f"{key}:{expected_val}:{current}:{step}:{idx}", + ) + for key in self.storage_invariants.get("nonzero", []): + current = self.to_int(storage.get(key, "0x0")) + if current == 0: + self.record_invariant_bug( + "storage_nonzero", + f"{key}:{current}:{step}:{idx}", + ) + + balance_min = self.balance_invariants.get("min", {}) + for addr, min_val in balance_min.items(): + current = self.to_int(state.get(addr, {}).get("balance", "0x0")) + min_val_int = self.to_int(min_val) + if current is not None and min_val_int is not None and current < min_val_int: + self.record_invariant_bug( + "balance_min", + f"{addr}:{min_val_int}:{current}:{step}:{idx}", + ) + balance_max = self.balance_invariants.get("max", {}) + for addr, max_val in balance_max.items(): + current = self.to_int(state.get(addr, {}).get("balance", "0x0")) + max_val_int = self.to_int(max_val) + if current is not None and max_val_int is not None and current > max_val_int: + self.record_invariant_bug( + "balance_max", + f"{addr}:{max_val_int}:{current}:{step}:{idx}", + ) def run(self, num_iterations=10): for i in range(num_iterations): if DEBUG[0] == "v": print ("\n" + "-"*80) print(f"Iteration {i}\n") - tx_data = [] - self.raw_inputs = [] - for idx in range(self.num_instances): - input, function = self.select_next_input() - new_input = self.mutate(input, function) - if DEBUG[0] == "v": - print(f"Function {function} : {new_input}") - self.post_process_input(tx_data, new_input, function) - - tx_trace = self.library.run_transactions(tx_data) - self.process_tx_trace(tx_trace) - if len(DEBUG) > 1 and DEBUG[1] == "v": - print(f"Iteration {i} : {tx_data}") - pprint(tx_trace) + for step in range(self.sequence_length): + tx_data = [] + self.raw_inputs = [] + for idx in range(self.num_instances): + input, function = self.select_next_input() + new_input = self.mutate(input, function) + if DEBUG[0] == "v": + print(f"Function {function} : {new_input}") + self.post_process_input(tx_data, new_input, function) + + tx_trace = self.library.run_transactions(tx_data) + self.process_tx_trace(tx_trace) + self.check_invariants(step) + if len(DEBUG) > 1 and DEBUG[1] == "v": + print(f"Iteration {i} Step {step} : {tx_data}") + pprint(tx_trace) print ("\n\n Final Population \n\n") self.print_population() @@ -376,4 +469,4 @@ def finalize_report(self): fuzzer = Fuzzer(args.input, int(args.num_instances), args.timeout, args.config, contract_name= args.contract_name , output=args.output, test_case_file=args.test_case, random_seed= int(args.random_seed), branch_heuristic=args.branch_heuristic) fuzzer.run(num_iterations=int(args.num_iterations)) - fuzzer.finalize_report() \ No newline at end of file + fuzzer.finalize_report() diff --git a/fuzzing/gpu_fuzzer.py b/fuzzing/gpu_fuzzer.py new file mode 100644 index 0000000..cc5d0a0 --- /dev/null +++ b/fuzzing/gpu_fuzzer.py @@ -0,0 +1,1416 @@ +#!/usr/bin/env python3 +""" +CuEVM GPU Fuzzer for NVIDIA B300 +Complete smart contract fuzzing with full coverage + +This module provides a Python interface to the GPU-accelerated +smart contract fuzzer optimized for NVIDIA B300 GPUs. +""" + +import sys +import os +import json +import time +import argparse +import hashlib +import signal +from dataclasses import dataclass, field, asdict +from typing import List, Dict, Optional, Callable, Any, Tuple +from pathlib import Path +from enum import Enum, auto +import random +import struct +from concurrent.futures import ThreadPoolExecutor +from collections import defaultdict +import threading + +# Add paths +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +sys.path.append("./binary/") + +try: + import libcuevm + HAS_GPU = True +except ImportError: + HAS_GPU = False + print("Warning: GPU library not available, running in simulation mode") + +try: + from utils import ( + compile_file, get_transaction_data_from_config, + get_transaction_data_from_processed_abi, + EVMBranch, EVMBug, EVMCall, TraceEvent + ) +except ImportError: + # utils module not available, define minimal stubs + compile_file = None + get_transaction_data_from_config = None + get_transaction_data_from_processed_abi = None + EVMBranch = EVMBug = EVMCall = TraceEvent = None + +try: + from eth_abi import encode as eth_encode +except ImportError: + eth_encode = None + +try: + from eth_utils import function_abi_to_4byte_selector +except ImportError: + def function_abi_to_4byte_selector(func_abi): + """Fallback selector generation using SHA3-256 (keccak)""" + try: + from Crypto.Hash import keccak + name = func_abi.get('name', '') + inputs = func_abi.get('inputs', []) + sig = f"{name}({','.join(i.get('type', '') for i in inputs)})" + k = keccak.new(digest_bits=256) + k.update(sig.encode()) + return k.digest()[:4] + except ImportError: + # Last resort fallback - use SHA256 (not correct for Ethereum but works for testing) + import hashlib + name = func_abi.get('name', '') + inputs = func_abi.get('inputs', []) + sig = f"{name}({','.join(i.get('type', '') for i in inputs)})" + return hashlib.sha256(sig.encode()).digest()[:4] + + +# ============================================================================ +# Enums and Constants +# ============================================================================ + +class BugSeverity(Enum): + INFORMATIONAL = 0 + LOW = 1 + MEDIUM = 2 + HIGH = 3 + CRITICAL = 4 + + +class BugType(Enum): + INTEGER_OVERFLOW = 0 + INTEGER_UNDERFLOW = 1 + DIVISION_BY_ZERO = 2 + REENTRANCY = 20 + TX_ORIGIN_AUTH = 13 + ETHER_LEAK = 70 + SELFDESTRUCT = 74 + ASSERTION_VIOLATION = 80 + INVARIANT_VIOLATION = 81 + CUSTOM = 200 + + +class MutationType(Enum): + FLIP_BIT = auto() + FLIP_BYTE = auto() + ARITH_INC = auto() + ARITH_DEC = auto() + INTERESTING = auto() + DICTIONARY = auto() + HAVOC = auto() + SPLICE = auto() + EVM_ADDRESS = auto() + EVM_UINT256 = auto() + EVM_SELECTOR = auto() + + +# B300 optimized constants +B300_DEFAULT_BATCH_SIZE = 65536 +B300_MAX_BATCH_SIZE = 524288 +B300_SM_COUNT = 192 + + +# ============================================================================ +# Data Classes +# ============================================================================ + +@dataclass +class FuzzerConfig: + """Configuration for the GPU fuzzer""" + # Batch sizing + num_instances: int = 8192 + sequence_length: int = 1 + auto_tune_batch_size: bool = True + + # Mutation + mutations_per_seed: int = 4 + havoc_iterations: int = 8 + abi_aware_mutation: bool = True + dictionary_mutation: bool = True + + # Coverage + track_edge_coverage: bool = True + track_branch_coverage: bool = True + gradient_guided: bool = True + + # Oracle + check_overflow: bool = True + check_underflow: bool = True + check_reentrancy: bool = True + check_ether_leak: bool = True + + # Corpus + max_corpus_size: int = 16384 + minimize_seeds: bool = True + cull_interval: int = 1000 + + # Scheduling + seed_schedule: str = "weighted" # random, weighted, round-robin + + # Reporting + stats_interval: int = 100 + checkpoint_interval: int = 10000 + verbose: bool = False + + # Limits + max_iterations: int = 0 # 0 = unlimited + max_time_seconds: int = 0 + stall_threshold: int = 100000 + + # GPU + gpu_device_id: int = 0 + + def set_for_b300(self): + """Optimize settings for B300 GPU""" + self.num_instances = B300_DEFAULT_BATCH_SIZE + self.mutations_per_seed = 8 + self.havoc_iterations = 16 + self.max_corpus_size = 65536 + + def to_dict(self) -> dict: + return asdict(self) + + @classmethod + def from_dict(cls, d: dict) -> 'FuzzerConfig': + return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__}) + + def save(self, filename: str): + with open(filename, 'w') as f: + json.dump(self.to_dict(), f, indent=2) + + @classmethod + def load(cls, filename: str) -> 'FuzzerConfig': + with open(filename) as f: + return cls.from_dict(json.load(f)) + + +@dataclass +class DetectedBug: + """Represents a detected vulnerability""" + bug_type: BugType + severity: BugSeverity + pc: int + tx_index: int + opcode: int + operand1: int + operand2: int + result: int + description: str + input_data: bytes + source_line: Optional[str] = None + source_file: Optional[str] = None + timestamp: float = field(default_factory=time.time) + + def to_dict(self) -> dict: + return { + 'type': self.bug_type.name, + 'severity': self.severity.name, + 'pc': self.pc, + 'tx_index': self.tx_index, + 'description': self.description, + 'input_data': self.input_data.hex() if self.input_data else None, + 'source_line': self.source_line, + 'timestamp': self.timestamp + } + + +@dataclass +class FuzzerStats: + """Statistics for fuzzing session""" + total_iterations: int = 0 + total_executions: int = 0 + total_transactions: int = 0 + + unique_edges: int = 0 + unique_branches: int = 0 + edge_coverage_percent: float = 0.0 + + total_bugs: int = 0 + critical_bugs: int = 0 + high_bugs: int = 0 + medium_bugs: int = 0 + + corpus_size: int = 0 + seeds_added: int = 0 + interesting_seeds: int = 0 + + total_time_seconds: float = 0.0 + executions_per_second: float = 0.0 + + last_new_coverage_iter: int = 0 + last_bug_iter: int = 0 + + def update_rates(self): + if self.total_time_seconds > 0: + self.executions_per_second = self.total_executions / self.total_time_seconds + + def to_dict(self) -> dict: + return asdict(self) + + def print_summary(self): + print(f"[{self.total_iterations}] execs: {self.total_executions} " + f"({self.executions_per_second:.0f}/s) | " + f"cov: {self.unique_edges} edges | " + f"bugs: {self.total_bugs} | corpus: {self.corpus_size}") + + +@dataclass +class Seed: + """A seed in the corpus""" + data: bytes + selector: bytes = b'' + params: List[Any] = field(default_factory=list) + param_types: List[str] = field(default_factory=list) + + # Metadata + id: int = 0 + parent_id: int = 0 + generation: int = 0 + + # Coverage info + unique_edges: int = 0 + coverage_hash: int = 0 + coverage_contribution: float = 0.0 + + # Quality + execution_count: int = 0 + mutation_count: int = 0 + bug_count: int = 0 + + # Scheduling + energy: int = 100 + priority: int = 0 + + # For sequences + transactions: List['Seed'] = field(default_factory=list) + sender: Optional[str] = None + value: int = 0 + + +@dataclass +class Invariant: + """Protocol invariant for checking""" + id: int + type: str # storage_equals, balance_min, sum_equals, etc. + description: str + target_address: str + slots: List[str] = field(default_factory=list) + expected_value: Optional[int] = None + min_value: Optional[int] = None + max_value: Optional[int] = None + enabled: bool = True + violation_count: int = 0 + + +# ============================================================================ +# Mutation Engine +# ============================================================================ + +class MutationEngine: + """GPU-style mutation engine for smart contract inputs""" + + # Interesting values for fuzzing + INTERESTING_8 = [-128, -1, 0, 1, 16, 32, 64, 100, 127] + INTERESTING_16 = [-32768, -129, -128, -1, 0, 1, 127, 128, 255, 256, 512, 1000, 1024, 32767] + INTERESTING_32 = [-2147483648, -100663046, -32769, -32768, -129, -128, -1, 0, 1, 127, 128, 255, 256, 512, 1000, 1024, 4096, 32767, 32768, 65535, 65536, 2147483647] + INTERESTING_256 = [ + 0, + 1, + 2**256 - 1, # MAX_UINT256 + 2**255, # MAX_INT256 + 1 + 2**255 - 1, # MAX_INT256 + 2**64, + 2**128, + 10**18, # 1 ETH in wei + ] + + COMMON_SELECTORS = [ + bytes.fromhex('a9059cbb'), # transfer + bytes.fromhex('23b872dd'), # transferFrom + bytes.fromhex('095ea7b3'), # approve + bytes.fromhex('70a08231'), # balanceOf + bytes.fromhex('dd62ed3e'), # allowance + ] + + def __init__(self, seed: int = None): + self.rng = random.Random(seed) + self.dictionary: Dict[str, List[bytes]] = defaultdict(list) + + def mutate(self, data: bytes) -> bytes: + """Apply a random mutation to the input""" + if len(data) == 0: + return self._generate_random(32) + + mutation_type = self.rng.choice([ + self._flip_bit, + self._flip_byte, + self._arith_inc, + self._arith_dec, + self._interesting_value, + self._havoc, + ]) + + return mutation_type(bytearray(data)) + + def _flip_bit(self, data: bytearray) -> bytes: + """Flip a random bit""" + if len(data) == 0: + return bytes(data) + pos = self.rng.randint(0, len(data) - 1) + bit = self.rng.randint(0, 7) + data[pos] ^= (1 << bit) + return bytes(data) + + def _flip_byte(self, data: bytearray) -> bytes: + """Flip a random byte""" + if len(data) == 0: + return bytes(data) + pos = self.rng.randint(0, len(data) - 1) + data[pos] ^= 0xFF + return bytes(data) + + def _arith_inc(self, data: bytearray) -> bytes: + """Increment a value""" + if len(data) < 1: + return bytes(data) + pos = self.rng.randint(0, len(data) - 1) + delta = self.rng.randint(1, 35) + data[pos] = (data[pos] + delta) & 0xFF + return bytes(data) + + def _arith_dec(self, data: bytearray) -> bytes: + """Decrement a value""" + if len(data) < 1: + return bytes(data) + pos = self.rng.randint(0, len(data) - 1) + delta = self.rng.randint(1, 35) + data[pos] = (data[pos] - delta) & 0xFF + return bytes(data) + + def _interesting_value(self, data: bytearray) -> bytes: + """Replace with an interesting value""" + if len(data) < 32: + return bytes(data) + + pos = self.rng.randint(0, len(data) - 32) + value = self.rng.choice(self.INTERESTING_256) + value_bytes = value.to_bytes(32, 'big') + for i in range(32): + data[pos + i] = value_bytes[i] + return bytes(data) + + def _havoc(self, data: bytearray) -> bytes: + """Apply multiple random mutations""" + num_mutations = self.rng.randint(2, 8) + for _ in range(num_mutations): + mutation = self.rng.choice([ + self._flip_bit, + self._flip_byte, + self._arith_inc, + self._arith_dec, + ]) + data = bytearray(mutation(data)) + return bytes(data) + + def _generate_random(self, length: int) -> bytes: + """Generate random bytes""" + return bytes(self.rng.getrandbits(8) for _ in range(length)) + + def mutate_address(self, data: bytearray, offset: int) -> bytes: + """Mutate an address parameter""" + if offset + 32 > len(data): + return bytes(data) + # Zero first 12 bytes, randomize last 20 + for i in range(12): + data[offset + i] = 0 + for i in range(20): + data[offset + 12 + i] = self.rng.getrandbits(8) + return bytes(data) + + def mutate_uint256(self, data: bytearray, offset: int) -> bytes: + """Mutate a uint256 parameter""" + if offset + 32 > len(data): + return bytes(data) + + strategy = self.rng.randint(0, 4) + if strategy == 0: # Zero + for i in range(32): + data[offset + i] = 0 + elif strategy == 1: # Max + for i in range(32): + data[offset + i] = 0xFF + elif strategy == 2: # Interesting + value = self.rng.choice(self.INTERESTING_256) + value_bytes = value.to_bytes(32, 'big') + for i in range(32): + data[offset + i] = value_bytes[i] + elif strategy == 3: # Power of 2 + for i in range(32): + data[offset + i] = 0 + bit = self.rng.randint(0, 255) + byte_pos = 31 - (bit // 8) + bit_pos = bit % 8 + data[offset + byte_pos] = 1 << bit_pos + else: # Random + for i in range(32): + data[offset + i] = self.rng.getrandbits(8) + + return bytes(data) + + def mutate_selector(self, data: bytearray) -> bytes: + """Mutate the function selector""" + if len(data) < 4: + return bytes(data) + + if self.rng.random() < 0.5 and self.COMMON_SELECTORS: + selector = self.rng.choice(self.COMMON_SELECTORS) + else: + selector = bytes(self.rng.getrandbits(8) for _ in range(4)) + + for i in range(4): + data[i] = selector[i] + return bytes(data) + + def add_to_dictionary(self, entry_type: str, data: bytes): + """Add a value to the mutation dictionary""" + if data not in self.dictionary[entry_type]: + self.dictionary[entry_type].append(data) + + def apply_dictionary(self, data: bytearray) -> bytes: + """Apply a dictionary value""" + if not any(self.dictionary.values()): + return bytes(data) + + all_entries = [] + for entries in self.dictionary.values(): + all_entries.extend(entries) + + if not all_entries: + return bytes(data) + + entry = self.rng.choice(all_entries) + if len(entry) > len(data): + return bytes(data) + + offset = self.rng.randint(0, max(0, len(data) - len(entry))) + for i, b in enumerate(entry): + data[offset + i] = b + return bytes(data) + + +# ============================================================================ +# Coverage Tracker +# ============================================================================ + +class CoverageTracker: + """Track code coverage from EVM execution""" + + def __init__(self, map_size: int = 65536): + self.map_size = map_size + self.edge_bitmap = bytearray(map_size) + self.branch_bitmap = bytearray(map_size) + self.virgin_bits = bytearray([0xFF] * map_size) + + self.unique_edges = 0 + self.unique_branches = 0 + self.total_edges = 0 + + self.edge_set = set() + self.branch_set = set() + + def record_edge(self, from_pc: int, to_pc: int): + """Record an edge (pc transition)""" + edge_hash = ((from_pc >> 1) ^ to_pc) % self.map_size + if self.edge_bitmap[edge_hash] < 255: + self.edge_bitmap[edge_hash] += 1 + self.total_edges += 1 + + edge_key = (from_pc, to_pc) + if edge_key not in self.edge_set: + self.edge_set.add(edge_key) + self.unique_edges = len(self.edge_set) + + def record_branch(self, pc: int, taken: bool, distance: int = 0): + """Record a branch decision""" + branch_hash = (pc ^ (1 if taken else 0)) % self.map_size + if self.branch_bitmap[branch_hash] < 255: + self.branch_bitmap[branch_hash] += 1 + + branch_key = (pc, taken) + if branch_key not in self.branch_set: + self.branch_set.add(branch_key) + self.unique_branches = len(self.branch_set) + + def has_new_bits(self) -> bool: + """Check if there's new coverage""" + for i in range(self.map_size): + if self.edge_bitmap[i] > 0 and self.virgin_bits[i] == 0xFF: + return True + return False + + def update_virgin(self): + """Update virgin bits after finding new coverage""" + for i in range(self.map_size): + if self.edge_bitmap[i] > 0: + self.virgin_bits[i] = 0 + + def merge(self, other: 'CoverageTracker'): + """Merge coverage from another tracker""" + for i in range(self.map_size): + combined = self.edge_bitmap[i] + other.edge_bitmap[i] + self.edge_bitmap[i] = min(255, combined) + + self.edge_set.update(other.edge_set) + self.branch_set.update(other.branch_set) + self.unique_edges = len(self.edge_set) + self.unique_branches = len(self.branch_set) + + def compute_hash(self) -> int: + """Compute a hash of the coverage bitmap""" + return hash(bytes(self.edge_bitmap)) + + def get_coverage_percent(self, total_possible: int) -> float: + """Get coverage percentage""" + if total_possible == 0: + return 0.0 + return (self.unique_edges / total_possible) * 100 + + +# ============================================================================ +# Bug Oracle +# ============================================================================ + +class BugOracle: + """Detect bugs during EVM execution""" + + def __init__(self, config: FuzzerConfig): + self.config = config + self.detected_bugs: List[DetectedBug] = [] + self.bug_signatures: set = set() + + def check_arithmetic(self, pc: int, opcode: int, a: int, b: int, result: int, + tx_index: int, input_data: bytes) -> Optional[DetectedBug]: + """Check for arithmetic bugs""" + # ADD overflow + if opcode == 0x01 and self.config.check_overflow: + if a + b >= 2**256: + return self._create_bug( + BugType.INTEGER_OVERFLOW, BugSeverity.HIGH, pc, tx_index, + opcode, a, b, result, "Integer overflow in ADD", input_data + ) + + # SUB underflow + if opcode == 0x03 and self.config.check_underflow: + if a < b: + return self._create_bug( + BugType.INTEGER_UNDERFLOW, BugSeverity.HIGH, pc, tx_index, + opcode, a, b, result, "Integer underflow in SUB", input_data + ) + + # MUL overflow + if opcode == 0x02 and self.config.check_overflow: + if a * b >= 2**256: + return self._create_bug( + BugType.INTEGER_OVERFLOW, BugSeverity.HIGH, pc, tx_index, + opcode, a, b, result, "Integer overflow in MUL", input_data + ) + + # DIV by zero + if opcode in [0x04, 0x05, 0x06, 0x07]: + if b == 0: + return self._create_bug( + BugType.DIVISION_BY_ZERO, BugSeverity.MEDIUM, pc, tx_index, + opcode, a, b, result, "Division/modulo by zero", input_data + ) + + return None + + def check_call(self, pc: int, opcode: int, target: int, value: int, + success: bool, tx_index: int, input_data: bytes) -> Optional[DetectedBug]: + """Check for call-related bugs""" + # Ether leak detection + if self.config.check_ether_leak and value > 0 and pc != 0: + return self._create_bug( + BugType.ETHER_LEAK, BugSeverity.HIGH, pc, tx_index, + opcode, target, value, 1 if success else 0, + "Potential ether leak via external call", input_data + ) + return None + + def check_selfdestruct(self, pc: int, beneficiary: int, balance: int, + tx_index: int, input_data: bytes) -> Optional[DetectedBug]: + """Check for selfdestruct vulnerabilities""" + return self._create_bug( + BugType.SELFDESTRUCT, BugSeverity.CRITICAL, pc, tx_index, + 0xFF, beneficiary, balance, 0, + "SELFDESTRUCT called", input_data + ) + + def check_tx_origin(self, pc: int, tx_index: int, input_data: bytes) -> Optional[DetectedBug]: + """Check for tx.origin usage""" + return self._create_bug( + BugType.TX_ORIGIN_AUTH, BugSeverity.MEDIUM, pc, tx_index, + 0x32, 0, 0, 0, + "tx.origin used (potential phishing vulnerability)", input_data + ) + + def _create_bug(self, bug_type: BugType, severity: BugSeverity, + pc: int, tx_index: int, opcode: int, + op1: int, op2: int, result: int, + description: str, input_data: bytes) -> Optional[DetectedBug]: + """Create a bug if not duplicate""" + signature = (bug_type, pc, opcode) + if signature in self.bug_signatures: + return None + + self.bug_signatures.add(signature) + + bug = DetectedBug( + bug_type=bug_type, + severity=severity, + pc=pc, + tx_index=tx_index, + opcode=opcode, + operand1=op1, + operand2=op2, + result=result, + description=description, + input_data=input_data + ) + self.detected_bugs.append(bug) + return bug + + def get_bugs_by_severity(self, min_severity: BugSeverity) -> List[DetectedBug]: + """Get bugs at or above a severity level""" + return [b for b in self.detected_bugs if b.severity.value >= min_severity.value] + + +# ============================================================================ +# Corpus Manager +# ============================================================================ + +class CorpusManager: + """Manage the corpus of interesting seeds""" + + def __init__(self, max_size: int = 16384): + self.max_size = max_size + self.seeds: List[Seed] = [] + self.seed_id_counter = 0 + self.coverage_hashes: set = set() + + self.total_energy = 0 + self.selection_weights: List[float] = [] + + def add_seed(self, data: bytes, coverage: CoverageTracker, + parent_id: int = 0, check_duplicate: bool = True) -> Optional[Seed]: + """Add a seed to the corpus if interesting""" + coverage_hash = coverage.compute_hash() + + if check_duplicate and coverage_hash in self.coverage_hashes: + return None + + if len(self.seeds) >= self.max_size: + self._cull() + + self.seed_id_counter += 1 + seed = Seed( + data=data, + id=self.seed_id_counter, + parent_id=parent_id, + unique_edges=coverage.unique_edges, + coverage_hash=coverage_hash, + energy=100 + ) + + self.seeds.append(seed) + self.coverage_hashes.add(coverage_hash) + self._update_weights() + + return seed + + def select_seed(self, weighted: bool = True) -> Optional[Seed]: + """Select a seed for mutation""" + if not self.seeds: + return None + + if weighted and self.selection_weights: + return random.choices(self.seeds, weights=self.selection_weights)[0] + return random.choice(self.seeds) + + def update_seed(self, seed: Seed, caused_new_coverage: bool, found_bug: bool): + """Update seed metadata after execution""" + seed.execution_count += 1 + + if caused_new_coverage: + seed.energy += 50 + if found_bug: + seed.energy += 100 + seed.bug_count += 1 + + # Energy decay + seed.energy = max(10, seed.energy - 1) + + self._update_weights() + + def _update_weights(self): + """Update selection weights based on seed energy""" + self.total_energy = sum(s.energy for s in self.seeds) + if self.total_energy > 0: + self.selection_weights = [s.energy / self.total_energy for s in self.seeds] + else: + self.selection_weights = [1.0 / len(self.seeds)] * len(self.seeds) if self.seeds else [] + + def _cull(self): + """Remove low-quality seeds to make room""" + if not self.seeds: + return + + # Sort by energy, keep top 75% + self.seeds.sort(key=lambda s: s.energy, reverse=True) + keep_count = int(len(self.seeds) * 0.75) + + removed = self.seeds[keep_count:] + for seed in removed: + self.coverage_hashes.discard(seed.coverage_hash) + + self.seeds = self.seeds[:keep_count] + self._update_weights() + + def save(self, directory: str): + """Save corpus to directory""" + os.makedirs(directory, exist_ok=True) + for seed in self.seeds: + filename = os.path.join(directory, f"seed_{seed.id}.bin") + with open(filename, 'wb') as f: + f.write(seed.data) + + def load(self, directory: str): + """Load corpus from directory""" + if not os.path.exists(directory): + return + + for filename in os.listdir(directory): + if filename.endswith('.bin'): + filepath = os.path.join(directory, filename) + with open(filepath, 'rb') as f: + data = f.read() + self.seed_id_counter += 1 + seed = Seed(data=data, id=self.seed_id_counter) + self.seeds.append(seed) + + self._update_weights() + + +# ============================================================================ +# Invariant Checker +# ============================================================================ + +class InvariantChecker: + """Check protocol invariants""" + + def __init__(self): + self.invariants: List[Invariant] = [] + self.invariant_id_counter = 0 + + def add_invariant(self, inv_type: str, description: str, + target_address: str, **kwargs) -> Invariant: + """Add a new invariant""" + self.invariant_id_counter += 1 + inv = Invariant( + id=self.invariant_id_counter, + type=inv_type, + description=description, + target_address=target_address, + **{k: v for k, v in kwargs.items() if k in Invariant.__dataclass_fields__} + ) + self.invariants.append(inv) + return inv + + def add_erc20_invariants(self, token_address: str): + """Add standard ERC20 invariants""" + self.add_invariant( + "balance_non_negative", + "Token balances must be non-negative", + token_address + ) + self.add_invariant( + "total_supply_conserved", + "Total supply must equal sum of balances", + token_address + ) + + def add_balance_invariant(self, address: str, min_val: int = 0, max_val: int = None): + """Add a balance invariant""" + self.add_invariant( + "balance_range", + f"Balance of {address} must be in range", + address, + min_value=min_val, + max_value=max_val + ) + + def check_all(self, state: dict, tx_index: int) -> List[Tuple[Invariant, bool]]: + """Check all invariants against current state""" + results = [] + for inv in self.invariants: + if not inv.enabled: + continue + + violated = self._check_single(inv, state) + if violated: + inv.violation_count += 1 + results.append((inv, violated)) + + return results + + def _check_single(self, inv: Invariant, state: dict) -> bool: + """Check a single invariant""" + if inv.type == "storage_equals": + actual = state.get(inv.target_address, {}).get("storage", {}).get(inv.slots[0], "0x0") + return int(actual, 16) != inv.expected_value + + elif inv.type == "balance_min": + actual = state.get(inv.target_address, {}).get("balance", "0x0") + return int(actual, 16) < inv.min_value + + elif inv.type == "balance_max": + actual = state.get(inv.target_address, {}).get("balance", "0x0") + return int(actual, 16) > inv.max_value if inv.max_value else False + + return False + + def load_from_json(self, filename: str): + """Load invariants from JSON file""" + with open(filename) as f: + data = json.load(f) + + for inv_data in data.get("invariants", []): + self.add_invariant(**inv_data) + + def save_to_json(self, filename: str): + """Save invariants to JSON file""" + data = { + "invariants": [ + { + "type": inv.type, + "description": inv.description, + "target_address": inv.target_address, + "slots": inv.slots, + "expected_value": inv.expected_value, + "min_value": inv.min_value, + "max_value": inv.max_value + } + for inv in self.invariants + ] + } + with open(filename, 'w') as f: + json.dump(data, f, indent=2) + + +# ============================================================================ +# GPU Fuzzer +# ============================================================================ + +class GPUFuzzer: + """Main GPU-accelerated smart contract fuzzer for NVIDIA B300""" + + def __init__(self, contract_source: str, contract_name: str = None, + config: FuzzerConfig = None): + self.contract_source = contract_source + self.contract_name = contract_name + self.config = config or FuzzerConfig() + + # Initialize components + self.mutation_engine = MutationEngine() + self.coverage = CoverageTracker() + self.oracle = BugOracle(self.config) + self.corpus = CorpusManager(self.config.max_corpus_size) + self.invariant_checker = InvariantChecker() + + # Statistics + self.stats = FuzzerStats() + self.start_time = None + + # Contract info + self.contract_instance = None + self.ast_parser = None + self.abi_list = {} + self.function_list = [] + + # Control + self.running = False + self._stop_requested = False + + # Callbacks + self.progress_callback = None + self.bug_callback = None + + # GPU library wrapper + self.gpu_lib = None + + def initialize(self) -> bool: + """Initialize the fuzzer""" + try: + # Compile contract + self.contract_instance, self.ast_parser = compile_file( + self.contract_source, self.contract_name + ) + + if self.contract_instance is None: + print(f"Error: Failed to compile contract {self.contract_name}") + return False + + # Parse ABI + self._parse_abi() + + # Initialize GPU library if available + if HAS_GPU: + self._init_gpu() + + return True + + except Exception as e: + print(f"Initialization error: {e}") + return False + + def _parse_abi(self): + """Parse contract ABI for function info""" + for item in self.contract_instance.get("abi", []): + if item.get("type") == "function": + name = item.get("name") + if item.get("stateMutability") != "view": + input_types = [inp.get("type") for inp in item.get("inputs", [])] + self.abi_list[name] = { + "input_types": input_types, + "4byte": function_abi_to_4byte_selector(item).hex() + } + self.function_list.append(name) + + def _init_gpu(self): + """Initialize GPU resources""" + # This would initialize the CuEVM GPU library + pass + + def add_seed(self, calldata: bytes): + """Add a seed to the initial corpus""" + seed = Seed(data=calldata) + self.corpus.seeds.append(seed) + + def add_function_seed(self, function_name: str, args: List[Any] = None): + """Add a seed for a specific function""" + if function_name not in self.abi_list: + print(f"Warning: Function {function_name} not found in ABI") + return + + abi_info = self.abi_list[function_name] + selector = bytes.fromhex(abi_info["4byte"]) + + if args is None: + args = [] + + if abi_info["input_types"] and args: + encoded_args = encode(abi_info["input_types"], args) + calldata = selector + encoded_args + else: + calldata = selector + + self.add_seed(calldata) + + def generate_initial_seeds(self): + """Generate initial seeds for all functions""" + for func_name in self.function_list: + abi_info = self.abi_list[func_name] + selector = bytes.fromhex(abi_info["4byte"]) + + # Empty args seed + self.add_seed(selector) + + # Generate seeds with default args + input_types = abi_info["input_types"] + if input_types: + default_args = self._generate_default_args(input_types) + encoded = encode(input_types, default_args) + self.add_seed(selector + encoded) + + def _generate_default_args(self, input_types: List[str]) -> List[Any]: + """Generate default argument values""" + args = [] + for t in input_types: + if "int" in t: + args.append(0) + elif "address" in t: + args.append("0x" + "11" * 20) + elif "bool" in t: + args.append(False) + elif "bytes32" in t: + args.append(b'\x00' * 32) + elif "bytes" in t: + args.append(b'') + elif "string" in t: + args.append("") + else: + args.append(0) + return args + + def add_invariant(self, inv: Invariant): + """Add a protocol invariant""" + self.invariant_checker.invariants.append(inv) + + def run(self, max_iterations: int = None, max_time: int = None): + """Run the fuzzing loop""" + self.running = True + self._stop_requested = False + self.start_time = time.time() + + max_iter = max_iterations or self.config.max_iterations + max_time = max_time or self.config.max_time_seconds + + iteration = 0 + + print(f"Starting GPU fuzzer...") + print(f"Config: {self.config.num_instances} instances, " + f"corpus: {len(self.corpus.seeds)} seeds") + + while self.running and not self._stop_requested: + # Check stop conditions + if max_iter and iteration >= max_iter: + break + if max_time and (time.time() - self.start_time) >= max_time: + break + if self._check_stall(): + print(f"Stopping: No progress for {self.config.stall_threshold} iterations") + break + + # Run one fuzzing iteration + self._fuzz_iteration() + + iteration += 1 + self.stats.total_iterations = iteration + + # Progress reporting + if iteration % self.config.stats_interval == 0: + self._report_progress() + + self.running = False + self._finalize() + + def _fuzz_iteration(self): + """Execute one fuzzing iteration""" + # Select seeds + seeds_to_run = self._select_seeds() + + # Mutate seeds + mutated_inputs = self._mutate_seeds(seeds_to_run) + + # Execute on GPU + results = self._execute_batch(mutated_inputs) + + # Process results + self._process_results(results, mutated_inputs) + + # Update statistics + self._update_stats() + + def _select_seeds(self) -> List[Seed]: + """Select seeds for this iteration""" + if not self.corpus.seeds: + # No seeds, generate empty input + return [Seed(data=bytes(4))] + + seeds = [] + for _ in range(self.config.num_instances): + seed = self.corpus.select_seed( + weighted=(self.config.seed_schedule == "weighted") + ) + if seed: + seeds.append(seed) + + return seeds + + def _mutate_seeds(self, seeds: List[Seed]) -> List[bytes]: + """Mutate selected seeds""" + mutated = [] + for seed in seeds: + for _ in range(self.config.mutations_per_seed): + mutated_data = self.mutation_engine.mutate(seed.data) + mutated.append(mutated_data) + seed.mutation_count += 1 + return mutated + + def _execute_batch(self, inputs: List[bytes]) -> List[dict]: + """Execute batch on GPU""" + results = [] + + if HAS_GPU and self.gpu_lib: + # Use GPU execution + results = self._execute_gpu(inputs) + else: + # Simulation mode + results = self._execute_simulated(inputs) + + self.stats.total_executions += len(inputs) + self.stats.total_transactions += len(inputs) + + return results + + def _execute_simulated(self, inputs: List[bytes]) -> List[dict]: + """Simulated execution for testing""" + results = [] + for inp in inputs: + # Simulate execution result + result = { + "success": True, + "branches": [], + "events": [], + "bugs": [], + "gas_used": 21000 + } + results.append(result) + return results + + def _execute_gpu(self, inputs: List[bytes]) -> List[dict]: + """Execute on GPU using CuEVM""" + # Build transaction data + tx_data = [] + for inp in inputs: + tx = { + "data": ["0x" + inp.hex()], + "value": ["0x0"] + } + tx_data.append(tx) + + # Call GPU library + # This would use libcuevm.run_dict() + return [] + + def _process_results(self, results: List[dict], inputs: List[bytes]): + """Process execution results""" + for i, result in enumerate(results): + input_data = inputs[i] if i < len(inputs) else b'' + + # Process coverage + for branch in result.get("branches", []): + self.coverage.record_edge(branch.get("pc_src", 0), branch.get("pc_dst", 0)) + self.coverage.record_branch( + branch.get("pc_src", 0), + branch.get("pc_dst", 0) != branch.get("pc_missed", 0) + ) + + # Check for bugs + for event in result.get("events", []): + opcode = event.get("opcode", 0) + pc = event.get("pc", 0) + op1 = event.get("operand_1", 0) + op2 = event.get("operand_2", 0) + res = event.get("result", 0) + + bug = self.oracle.check_arithmetic(pc, opcode, op1, op2, res, i, input_data) + if bug and self.bug_callback: + self.bug_callback(bug) + + # Check for new coverage + if self.coverage.has_new_bits(): + self.coverage.update_virgin() + self.corpus.add_seed(input_data, self.coverage) + self.stats.seeds_added += 1 + self.stats.last_new_coverage_iter = self.stats.total_iterations + + def _check_stall(self) -> bool: + """Check if fuzzing has stalled""" + if self.config.stall_threshold == 0: + return False + + iters_since_progress = self.stats.total_iterations - max( + self.stats.last_new_coverage_iter, + self.stats.last_bug_iter + ) + return iters_since_progress >= self.config.stall_threshold + + def _update_stats(self): + """Update statistics""" + elapsed = time.time() - self.start_time + self.stats.total_time_seconds = elapsed + self.stats.update_rates() + + self.stats.unique_edges = self.coverage.unique_edges + self.stats.unique_branches = self.coverage.unique_branches + self.stats.total_bugs = len(self.oracle.detected_bugs) + self.stats.corpus_size = len(self.corpus.seeds) + + self.stats.critical_bugs = len([b for b in self.oracle.detected_bugs + if b.severity == BugSeverity.CRITICAL]) + self.stats.high_bugs = len([b for b in self.oracle.detected_bugs + if b.severity == BugSeverity.HIGH]) + + def _report_progress(self): + """Report progress""" + if self.config.verbose: + self.stats.print_summary() + + if self.progress_callback: + self.progress_callback(self.stats) + + def _finalize(self): + """Finalize fuzzing session""" + self._update_stats() + print("\n" + "=" * 80) + print("FUZZING COMPLETE") + print("=" * 80) + self.print_stats() + self.print_bugs() + + def stop(self): + """Request fuzzer to stop""" + self._stop_requested = True + + def print_stats(self): + """Print statistics""" + print(f"\nEXECUTION:") + print(f" Iterations: {self.stats.total_iterations}") + print(f" Executions: {self.stats.total_executions}") + print(f" Time: {self.stats.total_time_seconds:.2f}s") + print(f" Exec/sec: {self.stats.executions_per_second:.2f}") + + print(f"\nCOVERAGE:") + print(f" Unique Edges: {self.stats.unique_edges}") + print(f" Unique Branches: {self.stats.unique_branches}") + + print(f"\nBUGS:") + print(f" Total: {self.stats.total_bugs}") + print(f" Critical: {self.stats.critical_bugs}") + print(f" High: {self.stats.high_bugs}") + + print(f"\nCORPUS:") + print(f" Size: {self.stats.corpus_size}") + print(f" Seeds Added: {self.stats.seeds_added}") + + def print_bugs(self): + """Print detected bugs""" + if not self.oracle.detected_bugs: + print("\nNo bugs detected.") + return + + print(f"\n{'=' * 80}") + print("DETECTED BUGS") + print('=' * 80) + + for bug in self.oracle.detected_bugs: + print(f"\n[{bug.severity.name}] {bug.bug_type.name}") + print(f" PC: {bug.pc}") + print(f" Description: {bug.description}") + if bug.input_data: + print(f" Input: {bug.input_data.hex()[:64]}...") + + def export_results(self, directory: str): + """Export results to directory""" + os.makedirs(directory, exist_ok=True) + + # Stats + with open(os.path.join(directory, "stats.json"), 'w') as f: + json.dump(self.stats.to_dict(), f, indent=2) + + # Bugs + bugs_data = [bug.to_dict() for bug in self.oracle.detected_bugs] + with open(os.path.join(directory, "bugs.json"), 'w') as f: + json.dump({"bugs": bugs_data}, f, indent=2) + + # Corpus + corpus_dir = os.path.join(directory, "corpus") + self.corpus.save(corpus_dir) + + print(f"Results exported to {directory}") + + +# ============================================================================ +# CLI +# ============================================================================ + +def main(): + parser = argparse.ArgumentParser( + description="CuEVM GPU Fuzzer for NVIDIA B300 - Smart Contract Fuzzing" + ) + + parser.add_argument("--input", "-i", required=True, help="Solidity source file") + parser.add_argument("--contract", "-c", help="Contract name") + parser.add_argument("--config", help="Configuration file (JSON)") + parser.add_argument("--output", "-o", help="Output directory for results") + + # Fuzzing parameters + parser.add_argument("--iterations", "-n", type=int, default=10000, + help="Maximum iterations") + parser.add_argument("--time", "-t", type=int, default=0, + help="Maximum time in seconds (0=unlimited)") + parser.add_argument("--instances", type=int, default=8192, + help="Batch size (instances per iteration)") + + # Options + parser.add_argument("--verbose", "-v", action="store_true", + help="Verbose output") + parser.add_argument("--b300", action="store_true", + help="Use B300-optimized settings") + + # Corpus + parser.add_argument("--seed-dir", help="Directory with initial seeds") + parser.add_argument("--checkpoint", help="Load from checkpoint file") + + # Invariants + parser.add_argument("--invariants", help="Invariants file (JSON)") + + args = parser.parse_args() + + # Create config + config = FuzzerConfig() + if args.config: + config = FuzzerConfig.load(args.config) + if args.b300: + config.set_for_b300() + + config.num_instances = args.instances + config.max_iterations = args.iterations + config.max_time_seconds = args.time + config.verbose = args.verbose + + # Create fuzzer + fuzzer = GPUFuzzer(args.input, args.contract, config) + + if not fuzzer.initialize(): + print("Failed to initialize fuzzer") + sys.exit(1) + + # Load invariants + if args.invariants: + fuzzer.invariant_checker.load_from_json(args.invariants) + + # Load seeds + if args.seed_dir: + fuzzer.corpus.load(args.seed_dir) + else: + fuzzer.generate_initial_seeds() + + # Setup signal handler + def signal_handler(sig, frame): + print("\nStopping fuzzer...") + fuzzer.stop() + + signal.signal(signal.SIGINT, signal_handler) + + # Run fuzzer + fuzzer.run() + + # Export results + if args.output: + fuzzer.export_results(args.output) + + +if __name__ == "__main__": + main() diff --git a/fuzzing/library_wrapper.py b/fuzzing/library_wrapper.py index 45d9c7f..4bbb4a1 100644 --- a/fuzzing/library_wrapper.py +++ b/fuzzing/library_wrapper.py @@ -39,6 +39,7 @@ def __init__( run_eth_tests, ) self.sender = sender + self.last_result_state = None def update_persistent_state(self, json_result): trace_values = json_result @@ -87,6 +88,7 @@ def run_transactions(self, tx_data, skip_trace_parsing=False, measure_performanc if measure_performance: time_start = time.time() result_state = libcuevm.run_dict(self.instances, skip_trace_parsing) + self.last_result_state = result_state if measure_performance: time_end = time.time() print(f"Time taken: {time_end - time_start} seconds") @@ -264,6 +266,8 @@ def build_instance_data(self, tx_data): self.instances[i]["transaction"]["value"] = tx_data[i]["value"] if tx_data[i].get("sender"): self.instances[i]["transaction"]["sender"] = tx_data[i]["sender"] + if tx_data[i].get("to"): + self.instances[i]["transaction"]["to"] = tx_data[i]["to"] # TODO: add other fuzz-able fields diff --git a/plans.md b/plans.md new file mode 100644 index 0000000..74a94e2 --- /dev/null +++ b/plans.md @@ -0,0 +1,57 @@ +# Plan: World‑class GPU‑only CuEVM fuzzing on NVIDIA B300 + +This plan lists **remaining work** needed to make CuEVM a production‑grade, GPU‑only fuzzer with maximum coverage, correctness, and throughput on B300‑class GPUs. + +## 1) Engine + fork coverage (correctness foundation) +- [ ] Implement Osaka / Fulu‑Osaka (Fusaka) fork support in CuEVM (opcodes, precompiles, fork rules, and block context fields). +- [ ] Add fork selection in GPU runner config so fuzzing uses the intended fork rules without CPU gating. +- [ ] Expand EIP‑3155 trace coverage to include all fork‑specific opcodes. +- [ ] Add regression GPU tests for new fork behavior using focused JSON fixtures. + +## 2) Coverage instrumentation + metrics +- [ ] Add on‑GPU coverage counters (branch + opcode + storage write sites). +- [ ] Export coverage summaries per batch to disk (JSON/CSV) for corpus management. +- [ ] Implement a coverage map merge step to guide next‑input selection. +- [ ] Track per‑contract and per‑function coverage for multi‑contract targets. + +## 3) Stateful, multi‑sequence fuzzing (core search) +- [ ] Add sequence‑aware mutation operators (reorder, insert, delete, splice). +- [ ] Persist and replay sequences with deterministic seeds (GPU‑only). +- [ ] Add block‑context mutation (timestamp, number, basefee, prevRandao). +- [ ] Add sender/role mutation and value mutation per transaction. +- [ ] Introduce cross‑contract call graph awareness to drive inter‑contract sequences. + +## 4) Invariants + oracles (signal, not noise) +- [ ] Expand invariant DSL: balance conservation, storage relations, access control, ERC‑4626/AMM/lending templates. +- [ ] Add invariant packs per protocol class with configuration templates. +- [ ] Implement invariant‑guided prioritization (keep cases that violate invariants). +- [ ] Add runtime assertions for invariants in Solidity (optional, but GPU‑only ingestion). + +## 5) Corpus + minimization (production‑grade outputs) +- [ ] Maintain a GPU‑only corpus of “interesting” seeds (coverage increase or invariant violation). +- [ ] Implement delta‑debug minimization for tx sequences. +- [ ] Generate reproducible JSON test cases from minimized sequences. +- [ ] Track unique bug signatures and avoid duplicates. + +## 6) GPU throughput + batch sizing +- [ ] Auto‑tune `num_instances` and `sequence_length` for B300 occupancy. +- [ ] Add batch‑level timers and throughput metrics (tx/s, sequences/s). +- [ ] Add Nsight Systems profile hooks for GPU bottleneck analysis. +- [ ] Introduce pinned memory pools for large batch I/O (where applicable). + +## 7) Reliability + observability +- [ ] Add GPU health checks and hard failure handling (OOM, illegal instruction). +- [ ] Emit structured logs per batch with coverage and invariant stats. +- [ ] Add DCGM exporter and Prometheus/Grafana dashboards for GPU metrics. +- [ ] Add crash‑safe checkpointing of corpus and failing sequences. + +## 8) CI + release hardening +- [ ] Add CI workflow for GPU fuzz smoke tests (short runs). +- [ ] Add nightly long‑run GPU fuzz jobs with artifact upload. +- [ ] Pin container base and toolchain versions (NGC + CUDA + CMake). +- [ ] Document reproducible release builds with B300 target settings. + +## 9) Security + governance +- [ ] Threat‑model fuzz runner inputs and harden file handling. +- [ ] Add fuzzing sandbox / resource limits for untrusted targets. +- [ ] Add upgrade checklist for dependencies and GPU drivers. diff --git a/scripts/run-ci-tests-gpu.py b/scripts/run-ci-tests-gpu.py index aadd3a8..1e9e50a 100644 --- a/scripts/run-ci-tests-gpu.py +++ b/scripts/run-ci-tests-gpu.py @@ -9,6 +9,7 @@ container_name = f"cuevm-test-runner-{run_id}" timeout_secs = 1800 max_workers = 1 # limited by RAM size set for each docker process and CPU cores +evm_fork = os.getenv("EVM_FORK", "SHANGHAI") # Run these most time-consuming folders first to have a better chance of completing them slow_folders_with_time = [ @@ -37,6 +38,7 @@ def run_test(folder, timeout_value, run_id, workspace): "--geth", "/goevmlab/gethvm", "--cuevm", "/workspaces/CuEVM/build/cuevm_GPU", "-i", f"/workspaces/CuEVM/ethereum/tests/GeneralStateTests/{folder}", + "--fork", evm_fork, "--ignore-errors", "--without-state-root" ] log_file = os.path.join(workspace, f"{run_id}/test-outputs/{folder}.log") diff --git a/scripts/run-ethtest-by-fork.py b/scripts/run-ethtest-by-fork.py index 659c9c2..93b8fed 100644 --- a/scripts/run-ethtest-by-fork.py +++ b/scripts/run-ethtest-by-fork.py @@ -145,7 +145,7 @@ def runtest_fork(input_directory, output_directory, fork='Shanghai', runtest_bin def main(): import argparse - parser = argparse.ArgumentParser(description='Filter JSON files for entries related to "Shanghai"') + parser = argparse.ArgumentParser(description='Filter JSON files for entries related to the selected fork') parser.add_argument('--input', '-i', type=str, required=True, help='Input directory containing JSON files') parser.add_argument('--temporary-path', '-t', type=str, required=True, help='Temporary directory to save the test files') parser.add_argument('--runtest-bin', type=str, required=True, help='goevmlab runtest binary path') @@ -156,6 +156,7 @@ def main(): parser.add_argument('--microtests', action='store_true', help='verify without the state root', default=False) parser.add_argument('--skip-folder', type=str, help='Skip folder', default="") parser.add_argument('--timeout', type=int, help='Timeout in seconds for each test', default=90) + parser.add_argument('--fork', type=str, help='EVM fork name (e.g. Shanghai, Cancun)', default="Shanghai") args = parser.parse_args() global TIME_OUT @@ -167,7 +168,23 @@ def main(): try: test_root = args.input print(f"Running tests for {test_root}") - runtest_fork(test_root, args.temporary_path, fork='Shanghai', runtest_bin=args.runtest_bin, geth_bin=args.geth, + fork = args.fork.strip() + fork_key = fork.upper() + fork_name_map = { + "SHANGHAI": "Shanghai", + "CANCUN": "Cancun", + "PARIS": "Paris", + "BERLIN": "Berlin", + "LONDON": "London", + "ISTANBUL": "Istanbul", + "CONSTANTINOPLE": "Constantinople", + "BYZANTIUM": "Byzantium", + "TANGERINE": "Tangerine", + "DRAGON": "Dragon", + "HOMESTEAD": "Homestead", + } + fork = fork_name_map.get(fork_key, fork) + runtest_fork(test_root, args.temporary_path, fork=fork, runtest_bin=args.runtest_bin, geth_bin=args.geth, cuevm_bin=args.cuevm, ignore_errors=args.ignore_errors, result=result, without_state_root=args.without_state_root, microtests=args.microtests, skip_folder=args.skip_folder) except Exception: