From 3ce57e63e7d0a807772ced7d5c494f8a4eaf742d Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Wed, 7 Jan 2026 23:12:08 +0100 Subject: [PATCH 1/3] Add example CMake for helpers/hotpatch --- CMakeLists.txt | 10 ++ cmake.toml | 2 + helpers/CMakeLists.txt | 95 +++++++++++++ helpers/aarch64/RemillHelpers.cpp | 125 +++++++++++++++++ helpers/aarch64/RemillHotpatch.cpp | 0 helpers/build.cmake.in | 32 +++++ helpers/x86_32/RemillHelpers.cpp | 211 +++++++++++++++++++++++++++++ helpers/x86_32/RemillHotpatch.cpp | 0 helpers/x86_64/RemillHelpers.cpp | 194 ++++++++++++++++++++++++++ helpers/x86_64/RemillHotpatch.cpp | 0 10 files changed, 669 insertions(+) create mode 100644 helpers/CMakeLists.txt create mode 100644 helpers/aarch64/RemillHelpers.cpp create mode 100644 helpers/aarch64/RemillHotpatch.cpp create mode 100644 helpers/build.cmake.in create mode 100644 helpers/x86_32/RemillHelpers.cpp create mode 100644 helpers/x86_32/RemillHotpatch.cpp create mode 100644 helpers/x86_64/RemillHelpers.cpp create mode 100644 helpers/x86_64/RemillHotpatch.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ab70fa0..e65a9f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,16 @@ find_package(LLVM-Wrapper REQUIRED) find_package(remill REQUIRED) +# Subdirectory: helpers +set(CMKR_CMAKE_FOLDER ${CMAKE_FOLDER}) +if(CMAKE_FOLDER) + set(CMAKE_FOLDER "${CMAKE_FOLDER}/helpers") +else() + set(CMAKE_FOLDER helpers) +endif() +add_subdirectory(helpers) +set(CMAKE_FOLDER ${CMKR_CMAKE_FOLDER}) + # Target: remill-example set(remill-example_SOURCES cmake.toml diff --git a/cmake.toml b/cmake.toml index 9c70918..012b5dc 100644 --- a/cmake.toml +++ b/cmake.toml @@ -6,6 +6,8 @@ name = "remill-template" CMAKE_MODULE_PATH = "${CMAKE_SOURCE_DIR}/cmake" GFLAGS_USE_TARGET_NAMESPACE = true +[subdir.helpers] + [find-package.LLVM-Wrapper] [find-package.remill] diff --git a/helpers/CMakeLists.txt b/helpers/CMakeLists.txt new file mode 100644 index 0000000..421441f --- /dev/null +++ b/helpers/CMakeLists.txt @@ -0,0 +1,95 @@ +# Get the remill include directories from the target +if(NOT TARGET remill_settings) + message(FATAL_ERROR "Could not find target 'remill_settings' (did remill update?)") +endif() +get_target_property(HELPER_REMILL_INCLUDE_DIR remill_settings INTERFACE_INCLUDE_DIRECTORIES) +if(HELPER_REMILL_INCLUDE STREQUAL "HELPER_REMILL_INCLUDE_DIR-NOTFOUND") + message(FATAL_ERROR "Could not determine remill include directory") +endif() +set(HELPER_REMILL_SYSROOT_DIR "${HELPER_REMILL_INCLUDE_DIR}/remill/Arch/Runtime/sysroot") +if(NOT EXISTS "${HELPER_REMILL_SYSROOT_DIR}") + message(FATAL_ERROR "Could not find remill sysroot directory: ${HELPER_REMILL_SYSROOT_DIR}") +endif() + +# Locate the clang executable +set(HELPER_CLANG_EXECUTABLE ${LLVM_TOOLS_BINARY_DIR}/clang++${CMAKE_EXECUTABLE_SUFFIX}) +if(NOT EXISTS "${HELPER_CLANG_EXECUTABLE}") + message(FATAL_ERROR "Could not find: ${HELPER_CLANG_EXECUTABLE}") +endif() + +message(STATUS "[helpers] Remill include: ${HELPER_REMILL_INCLUDE_DIR}") +message(STATUS "[helpers] Remill sysroot: ${HELPER_REMILL_SYSROOT_DIR}") +message(STATUS "[helpers] Clang: ${HELPER_CLANG_EXECUTABLE}") + +add_custom_target(helpers) + +function(add_helper arch) + set(HELPER_FLAGS ${ARGN}) + # Generate a CMake script to compile the helpers + # This allows the user to do project-specific helpers and easily recompile them + message(STATUS "[helpers] Adding architecture: ${arch}") + message(STATUS "[helpers] Additional flags: ${HELPER_FLAGS}") + + # TODO: These flags are not exactly the same as remill's + set(HELPER_CLANG_FLAGS + ${HELPER_FLAGS} + -ffreestanding + --sysroot=${HELPER_REMILL_SYSROOT_DIR} + -nostdinc++ + -isystem ${HELPER_REMILL_SYSROOT_DIR} + + -emit-llvm + -std=c++17 + -O3 + -Werror + -Wno-gnu-inline-cpp-without-extern + -Wno-return-type-c-linkage + + -fno-discard-value-names + -fstrict-aliasing + -fno-vectorize + -fno-slp-vectorize + -mllvm -enable-tbaa=true + + "-I${HELPER_REMILL_INCLUDE_DIR}" + ) + + set(HELPER_BINARY_DIR "${CMAKE_BINARY_DIR}/helpers/${arch}") + set(HELPER_SCRIPT "${HELPER_BINARY_DIR}/build.cmake") + set(HELPER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${arch}") + configure_file("build.cmake.in" "${HELPER_SCRIPT}" @ONLY) + + # Determine the inputs/outputs for the custom command + set(HELPER_SOURCES + "${HELPER_DIR}/RemillHelpers.cpp" + "${HELPER_DIR}/RemillHotpatch.cpp" + ) + set(HELPER_OUTPUTS "") + foreach(source ${HELPER_SOURCES}) + if(EXISTS "${source}") + get_filename_component(name "${source}" NAME_WE) + list(APPEND HELPER_OUTPUTS + "${HELPER_BINARY_DIR}/${name}.ll" + "${HELPER_BINARY_DIR}/${name}.bc" + ) + endif() + endforeach() + + # Create a custom target to build it automatically when any of the sources are updated + add_custom_command( + OUTPUT ${HELPER_OUTPUTS} + COMMAND "${CMAKE_COMMAND}" -P "${HELPER_SCRIPT}" + DEPENDS ${HELPER_SOURCES} ${HELPER_SCRIPT} + WORKING_DIRECTORY "${HELPER_BINARY_DIR}" + COMMENT "Building helpers for ${arch}" + ) + add_custom_target("helpers-${arch}" ALL + DEPENDS ${HELPER_OUTPUTS} + SOURCES ${HELPER_SOURCES} + ) + add_dependencies(helpers "helpers-${arch}") +endfunction() + +add_helper(aarch64 -target aarch64-none-elf -DADDRESS_SIZE_BITS=64) +add_helper(x86_64 -target x86_64-none-elf -DADDRESS_SIZE_BITS=64 -mlong-double-80) +add_helper(x86_32 -target i386-none-elf -DADDRESS_SIZE_BITS=32 -mlong-double-80) diff --git a/helpers/aarch64/RemillHelpers.cpp b/helpers/aarch64/RemillHelpers.cpp new file mode 100644 index 0000000..ca92c8c --- /dev/null +++ b/helpers/aarch64/RemillHelpers.cpp @@ -0,0 +1,125 @@ +/* + Original author: https://github.com/fvrmatteo + Reference: https://secret.club/2021/09/08/vmprotect-llvm-lifting-1.html + Remill helpers for tests: + https://github.com/lifting-bits/remill/blob/1fb647502b443cbd190e211b18f78979b857fd50/tests/AArch64/Run.cpp#L118-L663 +*/ + +#include + +// NOTE: We disable tail calls because it can cause the DSEPass to make false assumptions +#define HELPER extern "C" __attribute__((always_inline)) __attribute__((disable_tail_calls)) + +// Memory layout (0 length arrays treated as a simple pointer to unknown memory) + +extern "C" uint8_t RAM[0]; + +// Implementation of the Remill memory access (read/write) intrinsics + +HELPER uint8_t __remill_read_memory_8(Memory *m, addr_t a) { + uint8_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint16_t __remill_read_memory_16(Memory *m, addr_t a) { + uint16_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint32_t __remill_read_memory_32(Memory *m, addr_t a) { + uint32_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint64_t __remill_read_memory_64(Memory *m, addr_t a) { + uint64_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER Memory *__remill_write_memory_8(Memory *m, addr_t a, uint8_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_16(Memory *m, addr_t a, uint16_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_32(Memory *m, addr_t a, uint32_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_64(Memory *m, addr_t a, uint64_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +// Implementation of the Remill flag and comparison computation intrinsics + +HELPER bool __remill_flag_computation_zero(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_sign(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_overflow(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_carry(bool result, ...) { + return result; +} + +HELPER bool __remill_compare_sle(bool result) { + return result; +} + +HELPER bool __remill_compare_slt(bool result) { + return result; +} + +HELPER bool __remill_compare_sge(bool result) { + return result; +} + +HELPER bool __remill_compare_sgt(bool result) { + return result; +} + +HELPER bool __remill_compare_ule(bool result) { + return result; +} + +HELPER bool __remill_compare_ult(bool result) { + return result; +} + +HELPER bool __remill_compare_ugt(bool result) { + return result; +} + +HELPER bool __remill_compare_uge(bool result) { + return result; +} + +HELPER bool __remill_compare_eq(bool result) { + return result; +} + +HELPER bool __remill_compare_neq(bool result) { + return result; +} + +// Implementation of the remill hint calls + +HELPER Memory *__remill_function_return(State *, addr_t, Memory *memory) { + return memory; +} \ No newline at end of file diff --git a/helpers/aarch64/RemillHotpatch.cpp b/helpers/aarch64/RemillHotpatch.cpp new file mode 100644 index 0000000..e69de29 diff --git a/helpers/build.cmake.in b/helpers/build.cmake.in new file mode 100644 index 0000000..317b22e --- /dev/null +++ b/helpers/build.cmake.in @@ -0,0 +1,32 @@ +# To compile the helpers: +# cmake -P build.cmake + +set(HELPER_CLANG_FLAGS "@HELPER_CLANG_FLAGS@") +set(HELPER_CLANG_EXECUTABLE "@HELPER_CLANG_EXECUTABLE@") +set(HELPER_DIR "@HELPER_DIR@") + +message(STATUS "[@arch@] Directory: ${CMAKE_CURRENT_BINARY_DIR}") + +function(compile_helper basename) + set(source "${HELPER_DIR}/${basename}.cpp") + + if(NOT EXISTS "${source}") + message(STATUS "[@arch@] Not found: ${basename}.cpp (skipping)") + return() + endif() + + message(STATUS "[@arch@] Compiling ${basename}.cpp") + execute_process( + COMMAND "${HELPER_CLANG_EXECUTABLE}" -c "${source}" ${HELPER_CLANG_FLAGS} + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + COMMAND_ERROR_IS_FATAL ANY + ) + execute_process( + COMMAND "${HELPER_CLANG_EXECUTABLE}" -S "${source}" ${HELPER_CLANG_FLAGS} + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + COMMAND_ERROR_IS_FATAL ANY + ) +endfunction() + +compile_helper(RemillHelpers) +compile_helper(RemillHotpatch) diff --git a/helpers/x86_32/RemillHelpers.cpp b/helpers/x86_32/RemillHelpers.cpp new file mode 100644 index 0000000..2bc82f4 --- /dev/null +++ b/helpers/x86_32/RemillHelpers.cpp @@ -0,0 +1,211 @@ +/* + Original author: https://github.com/fvrmatteo + Reference: https://secret.club/2021/09/08/vmprotect-llvm-lifting-1.html + Remill helpers for tests: + https://github.com/lifting-bits/remill/blob/1fb647502b443cbd190e211b18f78979b857fd50/tests/X86/Run.cpp#L156-L722 +*/ + +#include + +// NOTE: We disable tail calls because it can cause the DSEPass to make false assumptions +#define HELPER extern "C" __attribute__((always_inline)) __attribute__((disable_tail_calls)) + +// Memory layout (0 length arrays treated as a simple pointer to unknown memory) + +extern "C" uint8_t RAM[0]; + +// For segment bases to zero + +HELPER uint32_t __remill_symbolic_CSBASE() { + return 0; +} + +HELPER uint32_t __remill_symbolic_SSBASE() { + return 0; +} + +HELPER uint32_t __remill_symbolic_ESBASE() { + return 0; +} + +HELPER uint32_t __remill_symbolic_DSBASE() { + return 0; +} + +// Implementation of the Remill memory access (read/write) intrinsics + +HELPER uint8_t __remill_read_memory_8(Memory *m, addr_t a) { + uint8_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint16_t __remill_read_memory_16(Memory *m, addr_t a) { + uint16_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint32_t __remill_read_memory_32(Memory *m, addr_t a) { + uint32_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint64_t __remill_read_memory_64(Memory *m, addr_t a) { + uint64_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER Memory *__remill_write_memory_8(Memory *m, addr_t a, uint8_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_16(Memory *m, addr_t a, uint16_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_32(Memory *m, addr_t a, uint32_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_64(Memory *m, addr_t a, uint64_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER uint8_t __remill_read_memory_f8(Memory *m, addr_t a) { + uint8_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint16_t __remill_read_memory_f16(Memory *m, addr_t a) { + uint16_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint32_t __remill_read_memory_f32(Memory *m, addr_t a) { + uint32_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint64_t __remill_read_memory_f64(Memory *m, addr_t a) { + uint64_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER Memory *__remill_write_memory_f8(Memory *m, addr_t a, uint8_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_f16(Memory *m, addr_t a, uint16_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_f32(Memory *m, addr_t a, uint32_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_f64(Memory *m, addr_t a, uint64_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +// Implementation of the Remill flag and comparison computation intrinsics + +HELPER bool __remill_flag_computation_zero(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_sign(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_overflow(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_carry(bool result, ...) { + return result; +} + +HELPER bool __remill_compare_sle(bool result) { + return result; +} + +HELPER bool __remill_compare_slt(bool result) { + return result; +} + +HELPER bool __remill_compare_sge(bool result) { + return result; +} + +HELPER bool __remill_compare_sgt(bool result) { + return result; +} + +HELPER bool __remill_compare_ule(bool result) { + return result; +} + +HELPER bool __remill_compare_ult(bool result) { + return result; +} + +HELPER bool __remill_compare_ugt(bool result) { + return result; +} + +HELPER bool __remill_compare_uge(bool result) { + return result; +} + +HELPER bool __remill_compare_eq(bool result) { + return result; +} + +HELPER bool __remill_compare_neq(bool result) { + return result; +} + +// Implementation of the Remill undefined values + +HELPER uint8_t __remill_undefined_8() { + return 0; +} + +HELPER uint16_t __remill_undefined_16() { + return 0; +} + +HELPER uint32_t __remill_undefined_32() { + return 0; +} + +HELPER uint64_t __remill_undefined_64() { + return 0; +} + +// Hack for DIV + +HELPER Memory *__remill_error(State *, addr_t, Memory *) { + __builtin_unreachable(); +} + +// Implementation of the remill hint calls + +HELPER Memory *__remill_function_return(State *, addr_t, Memory *memory) { + return memory; +} diff --git a/helpers/x86_32/RemillHotpatch.cpp b/helpers/x86_32/RemillHotpatch.cpp new file mode 100644 index 0000000..e69de29 diff --git a/helpers/x86_64/RemillHelpers.cpp b/helpers/x86_64/RemillHelpers.cpp new file mode 100644 index 0000000..c3e6e03 --- /dev/null +++ b/helpers/x86_64/RemillHelpers.cpp @@ -0,0 +1,194 @@ +/* + Original author: https://github.com/fvrmatteo + Reference: https://secret.club/2021/09/08/vmprotect-llvm-lifting-1.html + Remill helpers for tests: + https://github.com/lifting-bits/remill/blob/1fb647502b443cbd190e211b18f78979b857fd50/tests/X86/Run.cpp#L156-L722 +*/ + +#include + +// NOTE: We disable tail calls because it can cause the DSEPass to make false assumptions +#define HELPER extern "C" __attribute__((always_inline)) __attribute__((disable_tail_calls)) + +// Memory layout (0 length arrays treated as a simple pointer to unknown memory) + +extern "C" uint8_t RAM[0]; + +// Implementation of the Remill memory access (read/write) intrinsics + +HELPER uint8_t __remill_read_memory_8(Memory *m, addr_t a) { + uint8_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint16_t __remill_read_memory_16(Memory *m, addr_t a) { + uint16_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint32_t __remill_read_memory_32(Memory *m, addr_t a) { + uint32_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint64_t __remill_read_memory_64(Memory *m, addr_t a) { + uint64_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER Memory *__remill_write_memory_8(Memory *m, addr_t a, uint8_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_16(Memory *m, addr_t a, uint16_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_32(Memory *m, addr_t a, uint32_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_64(Memory *m, addr_t a, uint64_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER uint8_t __remill_read_memory_f8(Memory *m, addr_t a) { + uint8_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint16_t __remill_read_memory_f16(Memory *m, addr_t a) { + uint16_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint32_t __remill_read_memory_f32(Memory *m, addr_t a) { + uint32_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER uint64_t __remill_read_memory_f64(Memory *m, addr_t a) { + uint64_t v = 0; + __builtin_memcpy(&v, &RAM[a], sizeof(v)); + return v; +} + +HELPER Memory *__remill_write_memory_f8(Memory *m, addr_t a, uint8_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_f16(Memory *m, addr_t a, uint16_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_f32(Memory *m, addr_t a, uint32_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +HELPER Memory *__remill_write_memory_f64(Memory *m, addr_t a, uint64_t v) { + __builtin_memcpy(&RAM[a], &v, sizeof(v)); + return m; +} + +// Implementation of the Remill flag and comparison computation intrinsics + +HELPER bool __remill_flag_computation_zero(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_sign(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_overflow(bool result, ...) { + return result; +} + +HELPER bool __remill_flag_computation_carry(bool result, ...) { + return result; +} + +HELPER bool __remill_compare_sle(bool result) { + return result; +} + +HELPER bool __remill_compare_slt(bool result) { + return result; +} + +HELPER bool __remill_compare_sge(bool result) { + return result; +} + +HELPER bool __remill_compare_sgt(bool result) { + return result; +} + +HELPER bool __remill_compare_ule(bool result) { + return result; +} + +HELPER bool __remill_compare_ult(bool result) { + return result; +} + +HELPER bool __remill_compare_ugt(bool result) { + return result; +} + +HELPER bool __remill_compare_uge(bool result) { + return result; +} + +HELPER bool __remill_compare_eq(bool result) { + return result; +} + +HELPER bool __remill_compare_neq(bool result) { + return result; +} + +// Implementation of the Remill undefined values + +HELPER uint8_t __remill_undefined_8() { + return 0; +} + +HELPER uint16_t __remill_undefined_16() { + return 0; +} + +HELPER uint32_t __remill_undefined_32() { + return 0; +} + +HELPER uint64_t __remill_undefined_64() { + return 0; +} + +// Hack for DIV (in reality you want to overload the semantics) + +HELPER Memory *__remill_error(State *, addr_t, Memory *) { + __builtin_unreachable(); +} + +// Implementation of the remill hint calls + +HELPER Memory *__remill_function_return(State *, addr_t, Memory *memory) { + return memory; +} + diff --git a/helpers/x86_64/RemillHotpatch.cpp b/helpers/x86_64/RemillHotpatch.cpp new file mode 100644 index 0000000..e69de29 From b6d4da45f43ff1bfc4608d7455210e40d9b2b0ef Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Wed, 7 Jan 2026 23:34:43 +0100 Subject: [PATCH 2/3] Add example CPUID hotpatching https://shittycodingagent.ai/session/?aa9e2562ea7dc4e2655ed64f5344b9c2 --- helpers/x86_64/RemillHotpatch.cpp | 31 ++++ src/example.cpp | 287 ++++++++++++++++++++---------- src/exepath.hpp | 53 ++++++ 3 files changed, 278 insertions(+), 93 deletions(-) create mode 100644 src/exepath.hpp diff --git a/helpers/x86_64/RemillHotpatch.cpp b/helpers/x86_64/RemillHotpatch.cpp index e69de29..2b35dcb 100644 --- a/helpers/x86_64/RemillHotpatch.cpp +++ b/helpers/x86_64/RemillHotpatch.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +#include +#include +#include +#include + +// CPUID implementation + +typedef struct { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; +} CPUIDResult; + +extern "C" void __remill_cpuid(Memory *memory, CPUIDResult *result, + uint32_t eax, uint32_t ecx); + +DEF_SEM(CPUID) { + CPUIDResult result; + __remill_cpuid(memory, &result, state.gpr.rax.dword, state.gpr.rcx.dword); + state.gpr.rax.qword = result.eax; + state.gpr.rbx.qword = result.ebx; + state.gpr.rcx.qword = result.ecx; + state.gpr.rdx.qword = result.edx; + return memory; +} + +DEF_ISEL(CPUID) = CPUID; diff --git a/src/example.cpp b/src/example.cpp index 9b95cf1..0eb782a 100644 --- a/src/example.cpp +++ b/src/example.cpp @@ -1,93 +1,194 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -int main(int argc, char **argv) { - google::InitGoogleLogging(argv[0]); - - llvm::LLVMContext context; - auto arch = remill::Arch::Get(context, "linux", "amd64"); - if (!arch) { - llvm::outs() << "Failed to get architecture\n"; - return EXIT_FAILURE; - } - - auto semantics = remill::LoadArchSemantics(arch.get()); - if (!semantics) { - llvm::outs() << "Failed to load architecture semantics\n"; - return EXIT_FAILURE; - } - - auto intrinsics = arch->GetInstrinsicTable(); - if (!intrinsics) { - llvm::outs() << "Failed to get intrinsic table\n"; - return EXIT_FAILURE; - } - - // mov rcx, 1337 - uint8_t instr_bytes[] = {0x48, 0xc7, 0xc1, 0x39, 0x05, 0x00, 0x00}; - std::string_view instr_view(reinterpret_cast(instr_bytes), - sizeof(instr_bytes)); - remill::Instruction instruction; - remill::DecodingContext decoding_context = arch->CreateInitialContext(); - if (!arch->DecodeInstruction(0x1000, instr_view, instruction, - decoding_context)) { - llvm::outs() << "Failed to decode instruction\n"; - return EXIT_FAILURE; - } - - auto function = arch->DefineLiftedFunction("lifted_example", semantics.get()); - auto block = &function->getEntryBlock(); - auto lifter = instruction.GetLifter(); - auto status = lifter->LiftIntoBlock(instruction, block); - if (status != remill::kLiftedInstruction) { - llvm::outs() << "Failed to lift instruction\n"; - return EXIT_FAILURE; - } - - // Finish the lifted block by returning the memory pointer. - llvm::IRBuilder<> ir(block); - ir.CreateRet(remill::LoadMemoryPointer(block, *intrinsics)); - - // Print called functions - llvm::outs() << "[unoptimized]\n"; - auto printed = std::set{}; - for (auto &basic_block : *function) { - for (auto &instruction : basic_block) { - if (auto caller = llvm::dyn_cast(&instruction)) { - auto callee = caller->getCalledFunction(); - if (!printed.count(callee)) { - auto name = callee->getName().str(); - llvm::outs() << "; Mangled name: " << name << "\n"; - callee->setName(llvm::demangle(name)); - llvm::outs() << *callee << "\n"; - printed.insert(callee); - } - } - } - } - function->print(llvm::outs()); - - remill::OptimizeModule(arch.get(), semantics.get(), {function}); - - llvm::outs() << "\n[optimized]\n"; - function->print(llvm::outs()); - - return EXIT_SUCCESS; -} +#include +#include + +#include "exepath.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/// Hotpatch remill semantics by loading a bitcode module and linking it. +/// +/// Remill instruction selection works via ISEL_* global variables that point to +/// semantic functions. For example, ISEL_CPUID points to the function that +/// implements the CPUID instruction. +/// +/// To hotpatch an instruction: +/// 1. Create a .cpp file with the remill runtime headers +/// 2. Define a semantic function using DEF_SEM(name) { ... } +/// 3. Register it with DEF_ISEL(INSTRUCTION_NAME) = semantic_function; +/// 4. Compile to bitcode and link into the semantics module +/// +/// See helpers/x86_64/RemillHotpatch.cpp for an example. +static bool hotpatchRemill(llvm::Module &module, + const std::string &hotpatchPath) { + if (!std::filesystem::exists(hotpatchPath)) { + llvm::errs() << "Hotpatch file not found: " << hotpatchPath << "\n"; + return false; + } + + llvm::SMDiagnostic error; + auto patchModule = + llvm::parseIRFile(hotpatchPath, error, module.getContext()); + if (!patchModule) { + llvm::errs() << "Failed to parse hotpatch module: " << error.getMessage() + << "\n"; + return false; + } + + // Prepare the patch module to be compatible with the semantics module + patchModule->setDataLayout(module.getDataLayout()); + patchModule->setTargetTriple(module.getTargetTriple()); + + // Rename existing ISEL_ globals to avoid conflicts during linking + // The hotpatch module's ISEL_ globals will take precedence + for (const auto &global : patchModule->globals()) { + const auto &globalName = global.getName().str(); + // Check if this is a hotpatch ISEL global variable + if (globalName.rfind("ISEL_", 0) == 0) { + // Find and rename the existing global in the semantics module + auto *existingGlobal = module.getGlobalVariable(globalName); + if (existingGlobal) { + existingGlobal->setName(globalName + "_original"); + llvm::outs() << "Hotpatching: " << globalName << "\n"; + } + } + } + + // Link the hotpatch module into the semantics module + // OverrideFromSrc ensures the hotpatch definitions take precedence + if (llvm::Linker::linkModules(module, std::move(patchModule), + llvm::Linker::Flags::OverrideFromSrc)) { + llvm::errs() << "Failed to link hotpatch module\n"; + return false; + } + + return true; +} + +int main(int argc, char **argv) { + google::InitGoogleLogging(argv[0]); + + llvm::LLVMContext context; + auto arch = remill::Arch::Get(context, "linux", "amd64"); + if (!arch) { + llvm::outs() << "Failed to get architecture\n"; + return EXIT_FAILURE; + } + + auto semantics = remill::LoadArchSemantics(arch.get()); + if (!semantics) { + llvm::outs() << "Failed to load architecture semantics\n"; + return EXIT_FAILURE; + } + + // Apply hotpatch to remill semantics + // The hotpatch module is built by the helpers target + // (helpers/x86_64/RemillHotpatch.cpp) It provides custom implementations for + // specific instructions like CPUID + auto hotpatchPath = executableDir() / "helpers/x86_64/RemillHotpatch.bc"; + if (argc > 1) { + hotpatchPath = argv[1]; + } + + if (std::filesystem::exists(hotpatchPath)) { + llvm::outs() << "Applying hotpatch from: " << hotpatchPath.string() << "\n"; + if (!hotpatchRemill(*semantics, hotpatchPath.string())) { + llvm::outs() << "Warning: Failed to apply hotpatch\n"; + } + } else { + llvm::outs() << "No hotpatch file found at: " << hotpatchPath.string() + << "\n"; + } + + auto intrinsics = arch->GetInstrinsicTable(); + if (!intrinsics) { + llvm::outs() << "Failed to get intrinsic table\n"; + return EXIT_FAILURE; + } + + // Example 1: Lift a simple instruction (mov rcx, 1337) + llvm::outs() << "\n=== Lifting: mov rcx, 1337 ===\n"; + { + uint8_t instr_bytes[] = {0x48, 0xc7, 0xc1, 0x39, 0x05, 0x00, 0x00}; + std::string_view instr_view(reinterpret_cast(instr_bytes), + sizeof(instr_bytes)); + remill::Instruction instruction; + remill::DecodingContext decoding_context = arch->CreateInitialContext(); + if (!arch->DecodeInstruction(0x1000, instr_view, instruction, + decoding_context)) { + llvm::outs() << "Failed to decode instruction\n"; + return EXIT_FAILURE; + } + + auto function = arch->DefineLiftedFunction("lifted_mov", semantics.get()); + auto block = &function->getEntryBlock(); + auto lifter = instruction.GetLifter(); + auto status = lifter->LiftIntoBlock(instruction, block); + if (status != remill::kLiftedInstruction) { + llvm::outs() << "Failed to lift instruction\n"; + return EXIT_FAILURE; + } + + llvm::IRBuilder<> ir(block); + ir.CreateRet(remill::LoadMemoryPointer(block, *intrinsics)); + + remill::OptimizeModule(arch.get(), semantics.get(), {function}); + llvm::outs() << "[optimized]\n"; + function->print(llvm::outs()); + } + + // Example 2: Lift CPUID instruction (demonstrates hotpatching) + llvm::outs() << "\n=== Lifting: cpuid ===\n"; + { + uint8_t instr_bytes[] = {0x0f, 0xa2}; // cpuid + std::string_view instr_view(reinterpret_cast(instr_bytes), + sizeof(instr_bytes)); + remill::Instruction instruction; + remill::DecodingContext decoding_context = arch->CreateInitialContext(); + if (!arch->DecodeInstruction(0x2000, instr_view, instruction, + decoding_context)) { + llvm::outs() << "Failed to decode CPUID instruction\n"; + return EXIT_FAILURE; + } + + auto function = arch->DefineLiftedFunction("lifted_cpuid", semantics.get()); + auto block = &function->getEntryBlock(); + auto lifter = instruction.GetLifter(); + auto status = lifter->LiftIntoBlock(instruction, block); + if (status != remill::kLiftedInstruction) { + llvm::outs() << "Failed to lift CPUID instruction\n"; + return EXIT_FAILURE; + } + + llvm::IRBuilder<> ir(block); + ir.CreateRet(remill::LoadMemoryPointer(block, *intrinsics)); + + // Print unoptimized to see the hotpatched implementation + llvm::outs() << "[unoptimized]\n"; + function->print(llvm::outs()); + + remill::OptimizeModule(arch.get(), semantics.get(), {function}); + llvm::outs() << "\n[optimized]\n"; + function->print(llvm::outs()); + } + + return EXIT_SUCCESS; +} diff --git a/src/exepath.hpp b/src/exepath.hpp new file mode 100644 index 0000000..e4e3475 --- /dev/null +++ b/src/exepath.hpp @@ -0,0 +1,53 @@ +#include + +// Based on: https://github.com/Coollab-Art/exe_path (BSL-1.0) + +#if defined(_WIN32) +#include +#include + +static auto executable_path_impl() -> std::filesystem::path { + char path[MAX_PATH]; // NOLINT(*avoid-c-arrays) + GetModuleFileNameA(nullptr, path, MAX_PATH); + return path; +} +#elif defined(__linux__) +#include +#if defined(__sun) +#define PROC_SELF_EXE "/proc/self/path/a.out" +#else +#define PROC_SELF_EXE "/proc/self/exe" +#endif + +static auto executable_path_impl() -> std::filesystem::path { + char buffer[PATH_MAX]; + const char *const path = realpath(PROC_SELF_EXE, buffer); + return path; +} +#elif defined(__APPLE__) +#include +#include + +static auto executable_path_impl() -> std::filesystem::path { + char raw_path_name[PATH_MAX]; + char real_path_name[PATH_MAX]; + auto raw_path_size = static_cast(sizeof(raw_path_name)); + if (!_NSGetExecutablePath(raw_path_name, &raw_path_size)) { + realpath(raw_path_name, real_path_name); + } + return real_path_name; +} +#else +#error "Unsupported platform" +#endif + +inline std::filesystem::path executablePath() { + static auto const path = + std::filesystem::weakly_canonical(executable_path_impl()); + return path; +} + +inline std::filesystem::path executableDir() { + static auto const path = executablePath().parent_path(); + return path; +} From 042c426f7c54dab6a0b9a366a22ff40e1eed6fc2 Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Wed, 7 Jan 2026 23:55:18 +0100 Subject: [PATCH 3/3] Fix Windows build --- src/example.cpp | 1 + src/exepath.hpp | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/example.cpp b/src/example.cpp index 0eb782a..1950473 100644 --- a/src/example.cpp +++ b/src/example.cpp @@ -4,6 +4,7 @@ #include "exepath.hpp" #include + #include #include #include diff --git a/src/exepath.hpp b/src/exepath.hpp index e4e3475..ce9f731 100644 --- a/src/exepath.hpp +++ b/src/exepath.hpp @@ -3,7 +3,9 @@ // Based on: https://github.com/Coollab-Art/exe_path (BSL-1.0) #if defined(_WIN32) -#include +// Prevent Windows.h from pulling in a ton of crap that defines macros +// like 'small', 'near', 'far', etc. that conflict with normal code. +#define WIN32_LEAN_AND_MEAN #include static auto executable_path_impl() -> std::filesystem::path {