From 62636ba409c41c7e9ac7470235ae27ebc8f98b7d Mon Sep 17 00:00:00 2001 From: leegao Date: Wed, 13 Aug 2025 21:54:34 +1000 Subject: [PATCH 1/2] Initial harness for recording BCn artifacts, also synchronize all VkQueue dispatched functions (make OneByOne mode robust) --- src/vulkan/wrapper/artifacts.cpp | 33 ++++++ src/vulkan/wrapper/artifacts.h | 13 +++ src/vulkan/wrapper/meson.build | 1 + src/vulkan/wrapper/vk_entrypoints.py | 3 + src/vulkan/wrapper/wrapper_debug.c | 28 +---- src/vulkan/wrapper/wrapper_device.c | 123 ++++++++------------- src/vulkan/wrapper/wrapper_device_memory.c | 1 + src/vulkan/wrapper/wrapper_objects.h | 26 +++-- src/vulkan/wrapper/wrapper_private.h | 8 +- 9 files changed, 120 insertions(+), 116 deletions(-) create mode 100644 src/vulkan/wrapper/artifacts.cpp create mode 100644 src/vulkan/wrapper/artifacts.h diff --git a/src/vulkan/wrapper/artifacts.cpp b/src/vulkan/wrapper/artifacts.cpp new file mode 100644 index 00000000000..7b588cb1545 --- /dev/null +++ b/src/vulkan/wrapper/artifacts.cpp @@ -0,0 +1,33 @@ +#include "artifacts.h" +#include "wrapper_objects.h" +#include "wrapper_private.h" + +#include + +static FILE* open_log_file(const char* postfix, int id) { + static char dir[256]; + static bool initialized = false; + if (!initialized) { + initialized = true; + char time_str[20]; + get_current_time_string(time_str, sizeof(time_str)); + sprintf(dir, "/sdcard/Documents/Wrapper/artifacts_%s.%s.%d", time_str, getprogname(), getpid()); + if (mkdir(dir, 0777) == 0) { + WLOGE("Failed to create the artifacts directory %s", dir); + } else { + WLOGD("Logging artifacts to %s", dir); + } + } + std::string path = std::string(dir) + "/" + std::to_string(id) + "_" + postfix; + return fopen(path.c_str(), "w"); +} + +extern "C" +void RecordBCnArtifacts(struct wrapper_device* device, const VkBufferImageCopy* region, VkBuffer srcBuffer, VkBuffer stagingBuffer, int decode_id) { + // auto fd = open_log_file("region.txt", decode_id); + struct wrapper_buffer* wbuf = get_wrapper_buffer(device, srcBuffer); + if (!wbuf) { + WLOGE("srcBuffer not tracked, skipping (decode_id=%d)", decode_id); + } + // TODO: Implement this +} diff --git a/src/vulkan/wrapper/artifacts.h b/src/vulkan/wrapper/artifacts.h new file mode 100644 index 00000000000..91333c6a6cf --- /dev/null +++ b/src/vulkan/wrapper/artifacts.h @@ -0,0 +1,13 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void RecordBCnArtifacts(struct wrapper_device* device, const VkBufferImageCopy* region, VkBuffer srcBuffer, VkBuffer stagingBuffer, int decode_id); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/src/vulkan/wrapper/meson.build b/src/vulkan/wrapper/meson.build index 51a2fd792c4..975367bdfff 100644 --- a/src/vulkan/wrapper/meson.build +++ b/src/vulkan/wrapper/meson.build @@ -122,6 +122,7 @@ wrapper_files = files( 'wrapper_objects.c', 'spirv_edit.cpp', 'graphics_env_hooks.cpp', + 'artifacts.cpp', ) wrapper_deps = [ diff --git a/src/vulkan/wrapper/vk_entrypoints.py b/src/vulkan/wrapper/vk_entrypoints.py index 91a0651d33e..4435f59bf30 100644 --- a/src/vulkan/wrapper/vk_entrypoints.py +++ b/src/vulkan/wrapper/vk_entrypoints.py @@ -376,6 +376,9 @@ def _generate_trampoline(command, dispatch_table="device->dispatch_table"): handle_unwrap_logic[idx] = f" WLOGA(\"dispatch->{command.name}({', '.join(types)}) (id=%d)\", {', '.join([p.name for p in params])}, cmd_id);" handle_wrap_logic.append(f" WLOGA(\"dispatch->{command.name} {'returned %d' if command.return_type != 'void' else 'finished'} (id=%d)\"{', result' if command.return_type != 'void' else ''}, cmd_id);") + if params[0].type == 'VkQueue': + handle_unwrap_logic.append(" simple_mtx_lock(&base->resource_mutex);") + handle_wrap_logic = [" simple_mtx_unlock(&base->resource_mutex);"] + handle_wrap_logic return TRAMPOLINE_TEMPLATE.substitute( return_type=command.return_type, name=command.name, diff --git a/src/vulkan/wrapper/wrapper_debug.c b/src/vulkan/wrapper/wrapper_debug.c index 9e14dab1609..4b4722c6c5f 100644 --- a/src/vulkan/wrapper/wrapper_debug.c +++ b/src/vulkan/wrapper/wrapper_debug.c @@ -320,33 +320,7 @@ bool use_image_view_mode() { } bool use_compute_shader_mode() { - static bool initialized = false; - if (initialized) { - return g_use_compute_shader_mode; - } - initialized = true; - - bool use_image_view = use_image_view_mode(); - - char* env = getenv("USE_COMPUTE_SHADER"); - if (env) { - if (strcmp(env, "1") == 0) { - WLOG("Enabling experimental compute shader mode"); - g_use_compute_shader_mode = true; - } else if (strcmp(env, "0") == 0) { - WLOG("Disabling experimental compute shader mode"); - g_use_compute_shader_mode = false; - use_image_view = false; - g_use_image_view = false; - } - } - - if (use_image_view) { - g_use_compute_shader_mode = true; - return true; - } - - return g_use_compute_shader_mode; + return true; } diff --git a/src/vulkan/wrapper/wrapper_device.c b/src/vulkan/wrapper/wrapper_device.c index e23f5a14e5c..73343310ae1 100644 --- a/src/vulkan/wrapper/wrapper_device.c +++ b/src/vulkan/wrapper/wrapper_device.c @@ -18,6 +18,7 @@ #include "vk_unwrappers.h" #include "vk_printers.h" #include "spirv_edit.h" +#include "artifacts.h" #include "bcdec.h" @@ -335,7 +336,9 @@ WRAPPER_CreateDevice(VkPhysicalDevice physicalDevice, device->dispatch_handle); // Initialize the BCn interceptor states - bool use_image_view = use_image_view_mode(); + bool record_artifacts = CHECK_FLAG("RECORD_ARTIFACTS"); + bool use_image_view = use_image_view_mode() && !record_artifacts; + result = InterceptorState_Init(&device->s3tc, wrapper_device_to_handle(device), use_image_view ? sizeof(s3tc_iv_spv) : sizeof(s3tc_spv), @@ -809,18 +812,18 @@ WRAPPER_BindBufferMemory2( uint32_t bindInfoCount, const VkBindBufferMemoryInfo* pBindInfos) { - VK_FROM_HANDLE(wrapper_device, _device, device); + VK_FROM_HANDLE(wrapper_device, wdev, device); if (bindInfoCount == 0 || pBindInfos == NULL) { WLOGE("wrapper_BindBufferMemory2 called with no bind infos"); - return vk_error(&_device->vk, VK_ERROR_INVALID_EXTERNAL_HANDLE); + return vk_error(&wdev->vk, VK_ERROR_INVALID_EXTERNAL_HANDLE); } // Track all of the bindInfos for (uint32_t i = 0; i < bindInfoCount; i++) { - wrapper_buffer *_buffer = get_wrapper_buffer(_device, pBindInfos[i].buffer); + wrapper_buffer *_buffer = get_wrapper_buffer(wdev, pBindInfos[i].buffer); if (!_buffer) { - WLOGE("wrapper_BindBufferMemory2: buffer %p not tracked", pBindInfos[i].buffer); + WLOG("wrapper_BindBufferMemory2: buffer %p not tracked", pBindInfos[i].buffer); // return vk_error(&_device->vk, VK_ERROR_INVALID_EXTERNAL_HANDLE); // TODO(leegao): figure out what's going wrong here, but there are reports of this continue; @@ -885,7 +888,6 @@ static VkResult CreateConstantsUniformBuffer( static VkResult InterceptorState_Init(InterceptorState* state, VkDevice device, size_t spv_size, const uint32_t* spv_code, bool use_image_view, int bc_mode) { VkResult result; VK_FROM_HANDLE(wrapper_device, _device, device); - // 1. Create Descriptor Set Layout VkDescriptorSetLayoutBinding setLayoutBinding[3] = { { .binding = 0, @@ -1107,7 +1109,9 @@ static VkResult SubmitOneTimeCommands( void (*recordCommands)(struct wrapper_command_buffer*, void*), void* pUserData ) { - WLOG("Submitting one-time commands..."); + _Atomic static int counter = 0; + int id = counter++; + WLOGD("Submitting one-time commands for id=%d", id); VkResult result; VkDevice device = (VkDevice) _device; VkCommandBufferAllocateInfo allocInfo = { 0 }; @@ -1152,81 +1156,24 @@ static VkResult SubmitOneTimeCommands( return result; } - WLOG("Submitting command buffer to queue %p", queue); + WLOGD("Submitting command buffer to queue %p for id=%d", queue, id); result = WCHECK(QueueSubmit((VkQueue) queue, 1, &submitInfo, fence)); if (result != VK_SUCCESS) { return result; } - WLOG("Waiting for fence %p", fence); - WCHECK(WaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX)); + WLOGD("Waiting for fence %p for id=%d", fence, id); + result = WCHECK(WaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX)); if (result != VK_SUCCESS) { return result; } - WLOG("Command buffer execution completed"); + WLOGD("Command buffer execution completed for id=%d", id); WCHECKV(DestroyFence(device, fence, NULL)); WCHECKV(FreeCommandBuffers(device, commandPool, 1, &commandBuffer)); return VK_SUCCESS; } -struct CmdComputeShaderForDecompressionArgs { - struct wrapper_device* _device; - struct wrapper_image* wimg; - VkBuffer srcBuffer; - VkImage dstImage; - VkImageLayout dstImageLayout; - VkBuffer stagingBuffer; - const VkBufferImageCopy* region; - struct InterceptorState* state; -}; - -// void decode_bc6h_to_r16g16b16a16_sfloat(const void* compressedData, void* dstPixelBlock, int pitch, int isSigned) { -// // bcdec_bc6h_half decompresses to a 4x4 block of 3-component (RGB) half-floats. -// // We need a temporary buffer to store this intermediate result because the -// // output format (RGBA) has a different layout than the library's output (RGB). -// half_float temp_rgb_half_block[4][4][3]; - -// // The pitch for the temporary float buffer is the size of one row in bytes. -// // A row has 4 pixels, each with 3 half_float components. -// const int temp_pitch_bytes = 4 * 3 * sizeof(half_float); - -// // Step 1: Decompress the BC6h block into the temporary half-float buffer. -// // This is the most direct and efficient path for this target format. -// bcdec_bc6h_half(compressedData, temp_rgb_half_block, temp_pitch_bytes, isSigned); - -// // Step 2: Copy the RGB half-float data to the RGBA destination and add the Alpha channel. -// unsigned char* dst_row_bytes = (unsigned char*)dstPixelBlock; - -// // The bit representation of 1.0f in IEEE 754 half-precision format is 0x3C00. -// // This is used for the alpha channel, as BC6H is an RGB-only format. -// const half_float alpha_one = 0x3C00; - -// for (int y = 0; y < 4; ++y) { -// // Get a pointer to the start of the current pixel row in the destination. -// half_float* dst_pixel = (half_float*)dst_row_bytes; - -// for (int x = 0; x < 4; ++x) { -// // Get the RGB half values from the temporary buffer. -// const half_float r_half = temp_rgb_half_block[y][x][0]; -// const half_float g_half = temp_rgb_half_block[y][x][1]; -// const half_float b_half = temp_rgb_half_block[y][x][2]; - -// // Write the RGBA values to the destination. -// dst_pixel[0] = r_half; -// dst_pixel[1] = g_half; -// dst_pixel[2] = b_half; -// dst_pixel[3] = alpha_one; // Set alpha to 1.0f - -// // Move to the next pixel in the destination row (4 half_floats). -// dst_pixel += 4; -// } -// // Move to the next row in the destination buffer using the provided pitch. -// dst_row_bytes += pitch; -// } -// } - - static void BCnDecompression(VkFormat format, void* mappedSrcBase, void* mappedDst, @@ -1522,6 +1469,18 @@ static VkDeviceSize calculate_bc_copy_size(const VkBufferImageCopy* region, uint return offset_to_last_row + last_row_size_in_bytes; } +struct CmdComputeShaderForDecompressionArgs { + struct wrapper_device* _device; + struct wrapper_image* wimg; + VkBuffer srcBuffer; + VkImage dstImage; + VkImageLayout dstImageLayout; + VkBuffer stagingBuffer; + const VkBufferImageCopy* region; + struct InterceptorState* state; + bool use_image_view; +}; + static void CmdComputeShaderForDecompression( struct wrapper_command_buffer* _commandBuffer, struct CmdComputeShaderForDecompressionArgs* pArgs) @@ -1535,7 +1494,7 @@ static void CmdComputeShaderForDecompression( VkImage dstImage = wimg->dispatch_handle; struct InterceptorState* state = pArgs->state; VkCommandBuffer commandBuffer = _commandBuffer->dispatch_handle; - bool use_image_view = use_image_view_mode(); + bool use_image_view = pArgs->use_image_view; VkResult result; WLOG("CmdComputeShaderForDecompression: srcBuffer = %p, dstImage = %p", srcBuffer, dstImage); @@ -1841,13 +1800,14 @@ WRAPPER_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, } // --- Decompression Path --- - _Atomic static int count = 0; - count++; - WLOG("Emulating support for format=%d, count=%d", wimg->original_format, count); - + _Atomic static int counter = 0; + int decode_id = counter++; + WLOG("Emulating support for format=%d, decode_id=%d", wimg->original_format, decode_id); + + bool record_artifacts = CHECK_FLAG("RECORD_ARTIFACTS"); bool use_cpu_bcn = (get_host_decoding_bcn_masks() & (1 << (wimg->original_format - 131))) != 0; bool use_compute_shader = use_compute_shader_mode() && !use_cpu_bcn; - bool use_image_view = use_image_view_mode() && !use_cpu_bcn; + bool use_image_view = use_image_view_mode() && !use_cpu_bcn && !record_artifacts; // Check if the queues are the same struct wrapper_command_pool *pool = get_wrapper_command_pool(_device, wcb->pool); @@ -1907,6 +1867,7 @@ WRAPPER_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, .srcBuffer = srcBuffer, .region = region, .state = state, + .use_image_view = use_image_view, }; if (use_image_view) { @@ -1916,13 +1877,18 @@ WRAPPER_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, args.stagingBuffer = stagingBuffer; } - if (CHECK_FLAG("WRAPPER_ONE_BY_ONE")) { - SubmitOneTimeCommands( + if (CHECK_FLAG("WRAPPER_ONE_BY_ONE") || record_artifacts) { + WLOGD("Submitting decode_id %d", decode_id); + result = SubmitOneTimeCommands( _device, wcb->pool, _device->graphics_queue, (void (*)(struct wrapper_command_buffer*, void*)) &CmdComputeShaderForDecompression, &args); + if (result != VK_SUCCESS) { + WLOGE("GPU BCn decompression failed, expect visual glitches."); + return; + } } else { CmdComputeShaderForDecompression(wcb, &args); } @@ -1934,6 +1900,11 @@ WRAPPER_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, } } + if (record_artifacts) { + // Invariant: srcBuffer contains the BCn blocks, stagingBuffer contains the output + RecordBCnArtifacts(_device, region, srcBuffer, stagingBuffer, decode_id); + } + if (!use_image_view) { VkBufferMemoryBarrier bufferBarrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, diff --git a/src/vulkan/wrapper/wrapper_device_memory.c b/src/vulkan/wrapper/wrapper_device_memory.c index 7a4026d4bb8..bfc46f05633 100644 --- a/src/vulkan/wrapper/wrapper_device_memory.c +++ b/src/vulkan/wrapper/wrapper_device_memory.c @@ -295,6 +295,7 @@ wrapper_device_memory_from_handle(struct wrapper_device *device, return mem; } +// TODO: track all memory associated with host visible data WRAPPER_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, diff --git a/src/vulkan/wrapper/wrapper_objects.h b/src/vulkan/wrapper/wrapper_objects.h index a694f0db8bc..dca6f52a68c 100644 --- a/src/vulkan/wrapper/wrapper_objects.h +++ b/src/vulkan/wrapper/wrapper_objects.h @@ -18,6 +18,10 @@ #include "wrapper_trampolines.h" +#ifdef __cplusplus +extern "C" { +#endif + struct wrapper_instance { struct vk_instance vk; @@ -126,15 +130,15 @@ VK_DEFINE_HANDLE_CASTS(wrapper_command_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER) struct wrapper_device_memory { - struct AHardwareBuffer *ahardware_buffer; - struct wrapper_device *device; - struct list_head link; - int dmabuf_fd; - void *map_address; - size_t map_size; - size_t alloc_size; - VkDeviceMemory dispatch_handle; - const VkAllocationCallbacks *alloc; + struct AHardwareBuffer *ahardware_buffer; + struct wrapper_device *device; + struct list_head link; + int dmabuf_fd; + void *map_address; + size_t map_size; + size_t alloc_size; + VkDeviceMemory dispatch_handle; + const VkAllocationCallbacks *alloc; }; VkResult enumerate_physical_device(struct vk_instance *_instance); @@ -232,3 +236,7 @@ struct wrapper_command_pool { }; MAKE_PROTOTYPES(wrapper_command_pool, VkCommandPool); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/wrapper/wrapper_private.h b/src/vulkan/wrapper/wrapper_private.h index 4e15ca41315..d9661730691 100644 --- a/src/vulkan/wrapper/wrapper_private.h +++ b/src/vulkan/wrapper/wrapper_private.h @@ -61,10 +61,6 @@ bool adrenotools_validate_gpu_mapping(void *handle); void adrenotools_set_turbo(bool turbo); -#ifdef __cplusplus -} -#endif - extern const struct vk_instance_extension_table wrapper_instance_extensions; extern const struct vk_device_extension_table wrapper_device_extensions; extern const struct vk_device_extension_table wrapper_filter_extensions; @@ -437,3 +433,7 @@ static void populate_bc6_decoding_constants(Bc6Constants* constants) { memcpy(constants->partition_table2, BC6_PARTITION_TABLE2_DATA, sizeof(BC6_PARTITION_TABLE2_DATA)); memcpy(constants->anchor_table2, BC6_ANCHOR_TABLE2_DATA, sizeof(BC6_ANCHOR_TABLE2_DATA)); } + +#ifdef __cplusplus +} +#endif From 3a2c4943fd25ac2d942132cd64b92790215f6973 Mon Sep 17 00:00:00 2001 From: leegao Date: Wed, 13 Aug 2025 20:44:12 +1000 Subject: [PATCH 2/2] Add newer android setLayerPaths pattern (changed from const std::string to const std::string&) #126 --- src/vulkan/wrapper/graphics_env_hooks.cpp | 29 ++++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/vulkan/wrapper/graphics_env_hooks.cpp b/src/vulkan/wrapper/graphics_env_hooks.cpp index e383da72bb9..7f7462755f5 100644 --- a/src/vulkan/wrapper/graphics_env_hooks.cpp +++ b/src/vulkan/wrapper/graphics_env_hooks.cpp @@ -23,16 +23,29 @@ bool set_layer_paths() { LAYER_ERROR("Cannot open libgraphicsenv.so"); } - #define FIND(var, sig, name) auto var = sig (dlsym(handle, name)); \ + #define FIND(var, sig, name) var = sig (dlsym(handle, name)); \ if (!var) { \ LAYER_ERROR("Cannot find symbol in libgraphicsenv.so: " #name); \ } \ WLOGD("Found " #name " in libgraphicsenv.so at %p", var); - FIND(getInstance, (void* (*)()), "_ZN7android11GraphicsEnv11getInstanceEv"); - FIND(getLayerPaths, (const std::string&(*)(void*)), "_ZN7android11GraphicsEnv13getLayerPathsEv"); - FIND(getAppNamespace, (void* (*)(void*)), "_ZN7android11GraphicsEnv15getAppNamespaceEv"); - FIND(setLayerPaths, (void (*)(void*, void*, const std::string)), "_ZN7android11GraphicsEnv13setLayerPathsEPNS_21NativeLoaderNamespaceENSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEE"); + #define FIND2(var, sig, name) var = sig (dlsym(handle, name)); \ + if (!var) { \ + WLOGD("Cannot find " #name " in libgraphicsenv.so"); \ + } else { \ + WLOGD("Found " #name " in libgraphicsenv.so at %p", var); \ + } + + auto FIND(getInstance, (void* (*)()), "_ZN7android11GraphicsEnv11getInstanceEv"); + auto FIND(getLayerPaths, (const std::string&(*)(void*)), "_ZN7android11GraphicsEnv13getLayerPathsEv"); + auto FIND(getAppNamespace, (void* (*)(void*)), "_ZN7android11GraphicsEnv15getAppNamespaceEv"); + auto FIND2(setLayerPaths, (void (*)(void*, void*, const std::string)), "_ZN7android11GraphicsEnv13setLayerPathsEPNS_21NativeLoaderNamespaceENSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEE"); + // Later versions of Android uses this signature instead + auto FIND2(setLayerPaths2, (void (*)(void*, void*, const std::string&)), "_ZN7android11GraphicsEnv13setLayerPathsEPNS_21NativeLoaderNamespaceERKNSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEE"); + + if (!setLayerPaths && !setLayerPaths2) { + LAYER_ERROR("Could not find android::GraphicsEnv::setLayerPaths(android::NativeLoaderNamespace*, const std::string[&])"); + } void* instance = getInstance(); if (!instance) { @@ -49,7 +62,11 @@ bool set_layer_paths() { LAYER_ERROR("GraphicsEnv::mLayerPaths is already set to %s, cannot perform hijacking", path.c_str()); } void* app_namespace = getAppNamespace(instance); - setLayerPaths(instance, app_namespace, PATH); + if (setLayerPaths) { + setLayerPaths(instance, app_namespace, PATH); + } else { + setLayerPaths2(instance, app_namespace, PATH); + } path = getLayerPaths(instance); if (path != PATH) { LAYER_ERROR("GraphicsEnv::mLayerPaths failed to be set correctly, found %s", path.c_str());