From 00ac884a62429ec7b7275449b4f746bbe225ab49 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Fri, 30 May 2025 13:30:29 +0000
Subject: [PATCH 1/5] feat: Add SIMD pixel swizzling with tests and fuzzers

This commit introduces SIMD (AVX2, SSSE3, NEON) optimizations for pixel format conversion and swizzling operations, along with corresponding unit tests and fuzz targets.

Optimizations Implemented:
- BGR (24-bit) to BGRA (32-bit, Alpha=255) conversion in `CreateMatrixFromBitmap`.
- BGRA to RGBA swizzle (e.g., for `BmpTool::load`).
- RGBA to BGRA swizzle (e.g., for `BmpTool::save`).
- Corrected Alpha handling for 24-bit BGR sources to set Alpha to 255 (opaque).

Refactoring:
- The core SIMD and scalar logic for these operations has been refactored into helper functions:
  - `internal_convert_bgr_to_bgra_simd`
  - `internal_swizzle_bgra_to_rgba_simd`
  - `internal_swizzle_rgba_to_bgra_simd`
- These helpers are declared in `src/bitmap/bitmap.h` and `src/format/format_internal_helpers.hpp` for testability.

Testing:
- Added new Google Tests in `tests/test_swizzle.cpp` for all three helper functions, covering various input sizes, edge cases (0 pixels), and content verification.
- Added new fuzz targets in `tests/fuzz/` for each helper function to detect crashes and sanitizer issues with random inputs:
  - `fuzz_convert_bgr_to_bgra.cpp`
  - `fuzz_swizzle_bgra_to_rgba.cpp`
  - `fuzz_swizzle_rgba_to_bgra.cpp`
- Updated `tests/CMakeLists.txt` to integrate the new unit tests and fuzz targets into the build system.
---
 src/bitmap/bitmap.cpp                    |  89 ++++++++++++--
 src/format/bitmap.cpp                    | 143 +++++++++++++++++++---
 src/format/format_internal_helpers.hpp   |  11 ++
 src/simd_utils.hpp                       |  17 +++
 tests/CMakeLists.txt                     |  19 +++
 tests/fuzz/fuzz_convert_bgr_to_bgra.cpp  |  34 ++++++
 tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp |  37 ++++++
 tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp |  36 ++++++
 tests/test_swizzle.cpp                   | 149 +++++++++++++++++++++++
 9 files changed, 506 insertions(+), 29 deletions(-)
 create mode 100644 src/format/format_internal_helpers.hpp
 create mode 100644 src/simd_utils.hpp
 create mode 100644 tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
 create mode 100644 tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
 create mode 100644 tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
 create mode 100644 tests/test_swizzle.cpp
diff --git a/src/bitmap/bitmap.cpp b/src/bitmap/bitmap.cpp
index 9a46a3b..f248681 100644
--- a/src/bitmap/bitmap.cpp
+++ b/src/bitmap/bitmap.cpp
@@ -2,6 +2,8 @@
 #include <iostream>  // For standard I/O (though not explicitly used in this file's current state).
 #include <algorithm> // For std::min and std::max, used in ApplyBoxBlur and color adjustments.
 #include <vector>    // For std::vector, used by Matrix class and underlying bitmap data.
+#include <cstring>   // For std::memcpy, used in SIMD NEON path
+#include "../simd_utils.hpp" // Added include
 
 // Define BI_RGB as 0 if not already defined, to ensure cross-platform compatibility for bitmap compression type.
 #ifndef BI_RGB
@@ -183,6 +185,73 @@ Bitmap::File ApplyBoxBlur(Bitmap::File bitmapFile, int blurRadius)
     return CreateBitmapFromMatrix(blurredMatrix);
 }
 
+// Helper function for BGR to BGRA conversion with SIMD (declaration in bitmap.h)
+void internal_convert_bgr_to_bgra_simd(const uint8_t* src_row_bgr_ptr, ::Pixel* dest_row_pixel_ptr, size_t num_pixels_in_row) {
+    size_t current_src_byte_offset = 0;
+    size_t current_dest_pixel_idx = 0;
+    size_t num_pixels_to_process = num_pixels_in_row;
+
+#if defined(__AVX2__)
+    // As per previous implementation, AVX2 uses SSSE3 logic.
+    // No distinct 256-bit AVX2 path implemented here.
+#endif
+
+#if defined(__AVX2__) || defined(__SSSE3__) // Use SSSE3 for AVX2 as well if no specific AVX2 code
+    const size_t pixels_per_step = 4; 
+    __m128i bgr_to_bgrX_mask = _mm_setr_epi8(
+        0, 1, 2, (char)0x80, 
+        3, 4, 5, (char)0x80, 
+        6, 7, 8, (char)0x80, 
+        9, 10, 11, (char)0x80 
+    );
+    __m128i alpha_channel_ff = _mm_setr_epi8(
+        0,0,0, (char)0xFF, 0,0,0,(char)0xFF, 0,0,0,(char)0xFF, 0,0,0,(char)0xFF
+    );
+
+    while (num_pixels_to_process >= pixels_per_step) {
+        __m128i bgr_data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src_row_bgr_ptr + current_src_byte_offset));
+        __m128i bgra_pixels_expanded = _mm_shuffle_epi8(bgr_data, bgr_to_bgrX_mask);
+        __m128i bgra_pixels_final = _mm_or_si128(bgra_pixels_expanded, alpha_channel_ff);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(dest_row_pixel_ptr + current_dest_pixel_idx), bgra_pixels_final);
+        current_src_byte_offset += pixels_per_step * 3; 
+        current_dest_pixel_idx += pixels_per_step;    
+        num_pixels_to_process -= pixels_per_step;
+    }
+#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
+    const size_t pixels_per_step = 4;
+    const uint8_t table_bgr_to_bgr0[] = {0,1,2,16, 3,4,5,16, 6,7,8,16, 9,10,11,16}; 
+    uint8x16_t neon_shuffle_table = vld1q_u8(table_bgr_to_bgr0);
+    const uint8_t alpha_bytes[] = {0,0,0,0xFF, 0,0,0,0xFF, 0,0,0,0xFF, 0,0,0,0xFF};
+    uint8x16_t alpha_channel_ff_neon = vld1q_u8(alpha_bytes);
+    uint8_t temp_bgr_load[16];
+
+    while (num_pixels_to_process >= pixels_per_step) {
+        std::memcpy(temp_bgr_load, src_row_bgr_ptr + current_src_byte_offset, 12);
+        uint8x16_t bgr_data_loaded = vld1q_u8(temp_bgr_load);
+        uint8x16_t bgra_expanded = vqtbl1q_u8(bgr_data_loaded, neon_shuffle_table);
+        uint8x16_t bgra_final = vorrq_u8(bgra_expanded, alpha_channel_ff_neon);
+        vst1q_u8(reinterpret_cast<uint8_t*>(dest_row_pixel_ptr + current_dest_pixel_idx), bgra_final);
+        current_src_byte_offset += pixels_per_step * 3;
+        current_dest_pixel_idx += pixels_per_step;
+        num_pixels_to_process -= pixels_per_step;
+    }
+#else 
+    // This #else block ensures that if no SIMD path is taken (e.g. SSSE3/NEON not defined, or AVX2 defined but its specific block is empty and it's not grouped with SSSE3),
+    // the scalar loop below is the ONLY path for processing.
+    // The current structure with #if defined(__AVX2__) || defined(__SSSE3__) followed by #elif defined(__ARM_NEON)
+    // means this #else is for when NEITHER of those are true.
+    // If AVX2 is defined, it uses the SSSE3 block. If only NEON is defined, it uses NEON block.
+    // If none are defined, it falls to the scalar loop below.
+#endif
+    // Scalar fallback for remaining pixels OR if no SIMD defined/executed above
+    for (size_t k_rem = 0; k_rem < num_pixels_to_process; ++k_rem) {
+        (dest_row_pixel_ptr + current_dest_pixel_idx + k_rem)->blue  = *(src_row_bgr_ptr + current_src_byte_offset + k_rem * 3 + 0);
+        (dest_row_pixel_ptr + current_dest_pixel_idx + k_rem)->green = *(src_row_bgr_ptr + current_src_byte_offset + k_rem * 3 + 1);
+        (dest_row_pixel_ptr + current_dest_pixel_idx + k_rem)->red   = *(src_row_bgr_ptr + current_src_byte_offset + k_rem * 3 + 2);
+        (dest_row_pixel_ptr + current_dest_pixel_idx + k_rem)->alpha = 255;
+    }
+}
+
 // Converts a Bitmap::File object (containing raw bitmap data and headers)
 // into a Matrix::Matrix<Pixel> for easier pixel manipulation.
 Matrix::Matrix<Pixel> CreateMatrixFromBitmap(Bitmap::File bitmapFile)
@@ -236,16 +305,16 @@ Matrix::Matrix<Pixel> CreateMatrixFromBitmap(Bitmap::File bitmapFile)
     }
     else if (bitmapFile.bitmapInfoHeader.biBitCount == 24) // For 24-bit bitmaps (BGR)
     {
-        int k = 0; // Index for bitmapFile.bitmapData
-        for (int i = 0; i < imageMatrix.rows(); i++)
-            for (int j = 0; j < imageMatrix.cols(); j++)
-            {
-                imageMatrix[i][j].blue = bitmapFile.bitmapData[k];
-                imageMatrix[i][j].green = bitmapFile.bitmapData[k + 1];
-                imageMatrix[i][j].red = bitmapFile.bitmapData[k + 2];
-                imageMatrix[i][j].alpha = 0; // Default alpha to 0 (opaque) for 24-bit images.
-                k += 3; // Move to the next pixel (3 bytes)
-            }
+        // Calculate padding if necessary, though source data in bitmapFile.bitmapData should be packed according to BMP spec (rows padded to 4 bytes)
+        // However, we process pixel by pixel here from the linear bitmapData.
+        // The number of bytes per row in source data:
+        uint32_t src_bytes_per_row = (static_cast<uint32_t>(imageMatrix.cols()) * 3 + 3) & ~3u; // BMP rows are padded to 4 bytes for BGR
+
+        for (int i = 0; i < imageMatrix.rows(); i++) {
+            const uint8_t* src_row_bgr_ptr = bitmapFile.bitmapData.data() + (static_cast<size_t>(i) * src_bytes_per_row);
+            ::Pixel* dest_row_pixel_ptr = &imageMatrix[i][0];
+            internal_convert_bgr_to_bgra_simd(src_row_bgr_ptr, dest_row_pixel_ptr, imageMatrix.cols());
+        }
     }
     // Note: Other bit depths (e.g., 1, 4, 8, 16-bit) would require more complex handling.
 
diff --git a/src/format/bitmap.cpp b/src/format/bitmap.cpp
index f28e93f..1c0eb06 100644
--- a/src/format/bitmap.cpp
+++ b/src/format/bitmap.cpp
@@ -12,6 +12,8 @@
 #include "../../src/bitmapfile/bitmap_file.h" // For BITMAPFILEHEADER, BITMAPINFOHEADER from external lib
 #include "../../src/bitmap/bitmap.h"         // For ::Pixel, ::CreateMatrixFromBitmap, ::CreateBitmapFromMatrix
 #include "../../src/matrix/matrix.h"         // For Matrix::Matrix
+#include "../simd_utils.hpp" // Added include
+#include "format_internal_helpers.hpp" // Added include for the new helpers
 
 // Define constants for BMP format (can be used by Format::Internal helpers or if save needs them directly)
 constexpr uint16_t BMP_MAGIC_TYPE_CONST = 0x4D42; // 'BM'
@@ -150,20 +152,72 @@ Result<Bitmap, BitmapError> load(std::span<const uint8_t> bmp_data) {
     bmp_out.bpp = 32; 
     bmp_out.data.resize(static_cast<size_t>(bmp_out.w) * bmp_out.h * 4);
 
+    // The loop converting image_matrix to bmp_out.data
+    // bmp_out.data is already resized.
     for (uint32_t y = 0; y < bmp_out.h; ++y) {
-        for (uint32_t x = 0; x < bmp_out.w; ++x) {
-            const ::Pixel& src_pixel = image_matrix.at(y, x); // Changed Get(x,y) to at(y,x)
-            
-            size_t dest_idx = (static_cast<size_t>(y) * bmp_out.w + x) * 4;
-            bmp_out.data[dest_idx + 0] = src_pixel.red;   
-            bmp_out.data[dest_idx + 1] = src_pixel.green; 
-            bmp_out.data[dest_idx + 2] = src_pixel.blue;  
-            bmp_out.data[dest_idx + 3] = src_pixel.alpha; 
-        }
+        const ::Pixel* src_bgra_pixels_row = &image_matrix[y][0];
+        uint8_t* dest_rgba_data_row = bmp_out.data.data() + (static_cast<size_t>(y) * bmp_out.w * 4);
+        internal_swizzle_bgra_to_rgba_simd(src_bgra_pixels_row, dest_rgba_data_row, bmp_out.w);
     }
     return bmp_out;
 }
 
+
+// Helper function to convert an array of ::Pixel (BGRA order) to an array of uint8_t (RGBA order)
+void internal_swizzle_bgra_to_rgba_simd(const ::Pixel* src_bgra_pixels, uint8_t* dest_rgba_data, size_t num_pixels) {
+    size_t current_pixel_idx = 0;
+    size_t num_pixels_to_process = num_pixels;
+
+#if defined(__AVX2__)
+    const size_t pixels_per_step = 8; // 8 pixels = 32 bytes
+    __m256i shuffle_mask_bgra_to_rgba = _mm256_setr_epi8(
+        2, 1, 0, 3,  6, 5, 4, 7,  10, 9, 8, 11,  14,13,12,15, // First 4 pixels (16 bytes)
+        18,17,16,19, 22,21,20,23, 26,25,24,27, 30,29,28,31  // Next 4 pixels (16 bytes)
+    );
+    while (num_pixels_to_process >= pixels_per_step) {
+        __m256i bgra_pixels_loaded = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src_bgra_pixels + current_pixel_idx));
+        __m256i rgba_pixels = _mm256_shuffle_epi8(bgra_pixels_loaded, shuffle_mask_bgra_to_rgba);
+        _mm256_storeu_si256(reinterpret_cast<__m256i*>(dest_rgba_data + current_pixel_idx * 4), rgba_pixels);
+        current_pixel_idx += pixels_per_step;
+        num_pixels_to_process -= pixels_per_step;
+    }
+#elif defined(__SSSE3__) // _mm_shuffle_epi8 requires SSSE3
+    const size_t pixels_per_step = 4; // 4 pixels = 16 bytes
+    __m128i shuffle_mask_bgra_to_rgba = _mm_setr_epi8(
+        2, 1, 0, 3,  6, 5, 4, 7,  10, 9, 8, 11,  14,13,12,15
+    );
+    while (num_pixels_to_process >= pixels_per_step) {
+        __m128i bgra_pixels_loaded = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src_bgra_pixels + current_pixel_idx));
+        __m128i rgba_pixels = _mm_shuffle_epi8(bgra_pixels_loaded, shuffle_mask_bgra_to_rgba);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(dest_rgba_data + current_pixel_idx * 4), rgba_pixels);
+        current_pixel_idx += pixels_per_step;
+        num_pixels_to_process -= pixels_per_step;
+    }
+#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
+    const size_t pixels_per_step = 4; // 4 pixels = 16 bytes using uint8x16_t
+    const uint8_t shuffle_coeffs_array[] = {2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15};
+    uint8x16_t neon_shuffle_mask = vld1q_u8(shuffle_coeffs_array);
+
+    while (num_pixels_to_process >= pixels_per_step) {
+        uint8x16_t bgra_pixels_loaded = vld1q_u8(reinterpret_cast<const uint8_t*>(src_bgra_pixels + current_pixel_idx));
+        uint8x16_t rgba_pixels = vqtbl1q_u8(bgra_pixels_loaded, neon_shuffle_mask);
+        vst1q_u8(dest_rgba_data + current_pixel_idx * 4, rgba_pixels);
+        current_pixel_idx += pixels_per_step;
+        num_pixels_to_process -= pixels_per_step;
+    }
+#endif
+    // Scalar fallback for remaining pixels in the row
+    for (size_t i = 0; i < num_pixels_to_process; ++i) {
+        const ::Pixel& src_pixel = *(src_bgra_pixels + current_pixel_idx + i);
+        uint8_t* dest_pixel_ptr = dest_rgba_data + (current_pixel_idx + i) * 4;
+        dest_pixel_ptr[0] = src_pixel.red;   // R
+        dest_pixel_ptr[1] = src_pixel.green; // G
+        dest_pixel_ptr[2] = src_pixel.blue;  // B
+        dest_pixel_ptr[3] = src_pixel.alpha; // A
+    }
+}
+
+
 // The NEW BmpTool::save function using the external library
 Result<void, BitmapError> save(const Bitmap& bitmap_in, std::span<uint8_t> out_bmp_buffer) {
     // 1. Input Validation from BmpTool::Bitmap
@@ -185,17 +239,12 @@ Result<void, BitmapError> save(const Bitmap& bitmap_in, std::span<uint8_t> out_b
     // Assuming ::Pixel struct has members .red, .green, .blue, .alpha
     Matrix::Matrix<::Pixel> image_matrix(bitmap_in.h, bitmap_in.w); // Changed order to (rows, cols)
 
+    // The loop converting bitmap_in.data to image_matrix
+    // image_matrix is already sized.
     for (uint32_t y = 0; y < bitmap_in.h; ++y) {
-        for (uint32_t x = 0; x < bitmap_in.w; ++x) {
-            const uint8_t* src_pixel_ptr = &bitmap_in.data[(static_cast<size_t>(y) * bitmap_in.w + x) * 4]; // RGBA
-            ::Pixel dest_pixel; 
-            dest_pixel.red   = src_pixel_ptr[0];
-            dest_pixel.green = src_pixel_ptr[1];
-            dest_pixel.blue  = src_pixel_ptr[2];
-            dest_pixel.alpha = src_pixel_ptr[3];
-            
-            image_matrix.at(y, x) = dest_pixel; // Changed Set(x,y) to at(y,x)
-        }
+        const uint8_t* src_rgba_data_row = &bitmap_in.data[(static_cast<size_t>(y) * bitmap_in.w * 4)];
+        ::Pixel* dest_bgra_pixels_row = &image_matrix[y][0];
+        internal_swizzle_rgba_to_bgra_simd(src_rgba_data_row, dest_bgra_pixels_row, bitmap_in.w);
     }
 
     // 3. Convert Matrix<::Pixel> to ::Bitmap::File
@@ -255,4 +304,60 @@ Result<void, BitmapError> save(const Bitmap& bitmap_in, std::span<uint8_t> out_b
     return BmpTool::Success{}; // This will implicitly convert to Result<void, BitmapError>(Success{})
 }
 
+
+// Helper function to convert an array of uint8_t (RGBA order) to an array of ::Pixel (BGRA order)
+void internal_swizzle_rgba_to_bgra_simd(const uint8_t* src_rgba_data, ::Pixel* dest_bgra_pixels, size_t num_pixels) {
+    size_t current_pixel_idx = 0;
+    size_t num_pixels_to_process = num_pixels;
+
+#if defined(__AVX2__)
+    const size_t pixels_per_step = 8; // 8 pixels = 32 bytes
+    __m256i shuffle_mask_rgba_to_bgra = _mm256_setr_epi8(
+        2, 1, 0, 3,  6, 5, 4, 7,  10, 9, 8, 11,  14,13,12,15,
+        18,17,16,19, 22,21,20,23, 26,25,24,27, 30,29,28,31
+    );
+    while (num_pixels_to_process >= pixels_per_step) {
+        __m256i rgba_pixels_loaded = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src_rgba_data + current_pixel_idx * 4));
+        __m256i bgra_pixels = _mm256_shuffle_epi8(rgba_pixels_loaded, shuffle_mask_rgba_to_bgra);
+        _mm256_storeu_si256(reinterpret_cast<__m256i*>(dest_bgra_pixels + current_pixel_idx), bgra_pixels);
+        current_pixel_idx += pixels_per_step;
+        num_pixels_to_process -= pixels_per_step;
+    }
+#elif defined(__SSSE3__) // _mm_shuffle_epi8 requires SSSE3
+    const size_t pixels_per_step = 4; // 4 pixels = 16 bytes
+    __m128i shuffle_mask_rgba_to_bgra = _mm_setr_epi8(
+        2, 1, 0, 3,  6, 5, 4, 7,  10, 9, 8, 11,  14,13,12,15
+    );
+    while (num_pixels_to_process >= pixels_per_step) {
+        __m128i rgba_pixels_loaded = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src_rgba_data + current_pixel_idx * 4));
+        __m128i bgra_pixels = _mm_shuffle_epi8(rgba_pixels_loaded, shuffle_mask_rgba_to_bgra);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(dest_bgra_pixels + current_pixel_idx), bgra_pixels);
+        current_pixel_idx += pixels_per_step;
+        num_pixels_to_process -= pixels_per_step;
+    }
+#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
+    const size_t pixels_per_step = 4; // 4 pixels = 16 bytes
+    const uint8_t shuffle_coeffs_array[] = {2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15};
+    uint8x16_t neon_shuffle_mask = vld1q_u8(shuffle_coeffs_array);
+
+    while (num_pixels_to_process >= pixels_per_step) {
+        uint8x16_t rgba_pixels_loaded = vld1q_u8(src_rgba_data + current_pixel_idx * 4);
+        uint8x16_t bgra_pixels = vqtbl1q_u8(rgba_pixels_loaded, neon_shuffle_mask);
+        vst1q_u8(reinterpret_cast<uint8_t*>(dest_bgra_pixels + current_pixel_idx), bgra_pixels);
+        current_pixel_idx += pixels_per_step;
+        num_pixels_to_process -= pixels_per_step;
+    }
+#endif
+    // Scalar fallback for remaining pixels in the row
+    for (size_t i = 0; i < num_pixels_to_process; ++i) {
+        const uint8_t* src_pixel_ptr = src_rgba_data + (current_pixel_idx + i) * 4;
+        ::Pixel& dest_pixel = *(dest_bgra_pixels + current_pixel_idx + i);
+        
+        dest_pixel.red   = src_pixel_ptr[0]; // R
+        dest_pixel.green = src_pixel_ptr[1]; // G
+        dest_pixel.blue  = src_pixel_ptr[2]; // B
+        dest_pixel.alpha = src_pixel_ptr[3]; // A
+    }
+}
+
 } // namespace BmpTool
diff --git a/src/format/format_internal_helpers.hpp b/src/format/format_internal_helpers.hpp
new file mode 100644
index 0000000..450da31
--- /dev/null
+++ b/src/format/format_internal_helpers.hpp
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <cstddef> // For size_t
+// Include bitmap.h for the definition of ::Pixel
+#include "../bitmap/bitmap.h" 
+
+// Helper function to convert an array of ::Pixel (BGRA order) to an array of uint8_t (RGBA order)
+void internal_swizzle_bgra_to_rgba_simd(const ::Pixel* src_bgra_pixels, uint8_t* dest_rgba_data, size_t num_pixels);
+
+// Helper function to convert an array of uint8_t (RGBA order) to an array of ::Pixel (BGRA order)
+void internal_swizzle_rgba_to_bgra_simd(const uint8_t* src_rgba_data, ::Pixel* dest_bgra_pixels, size_t num_pixels);
diff --git a/src/simd_utils.hpp b/src/simd_utils.hpp
new file mode 100644
index 0000000..f2fe293
--- /dev/null
+++ b/src/simd_utils.hpp
@@ -0,0 +1,17 @@
+#pragma once
+
+// For x86/x64 SIMD intrinsics
+#if defined(__SSE2__) || defined(__AVX__) || defined(__AVX2__)
+#include <immintrin.h> // Includes SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, FMA, etc.
+#endif
+
+// For ARM NEON intrinsics
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#include <arm_neon.h>
+#endif
+
+// Helper to check if a pointer is aligned to a certain byte boundary
+template <typename T>
+inline bool is_aligned(const T* ptr, std::size_t alignment) {
+    return reinterpret_cast<std::uintptr_t>(ptr) % alignment == 0;
+}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 246d741..c5c431b 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -9,6 +9,7 @@ set(TEST_SOURCES
     test_bitmap.cpp
     test_bitmap_file.cpp
     test_matrix.cpp
+    test_swizzle.cpp # <-- Add this line
     tests_main.cpp
     api_roundtrip.cpp
 )
@@ -60,6 +61,24 @@ if(ENABLE_FUZZING AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   target_link_libraries(fuzz_matrix PRIVATE bitmap)
   set_target_properties(fuzz_matrix PROPERTIES LINK_FLAGS "${FUZZ_LINK_FLAGS}")
   message(STATUS "Fuzz target fuzz_matrix added.")
+
+  # Fuzzer for BGR to BGRA conversion
+  add_executable(fuzz_convert_bgr_to_bgra fuzz/fuzz_convert_bgr_to_bgra.cpp)
+  target_link_libraries(fuzz_convert_bgr_to_bgra PRIVATE bitmap)
+  set_target_properties(fuzz_convert_bgr_to_bgra PROPERTIES LINK_FLAGS "${FUZZ_LINK_FLAGS}")
+  message(STATUS "Fuzz target fuzz_convert_bgr_to_bgra added.")
+
+  # Fuzzer for BGRA to RGBA swizzle
+  add_executable(fuzz_swizzle_bgra_to_rgba fuzz/fuzz_swizzle_bgra_to_rgba.cpp)
+  target_link_libraries(fuzz_swizzle_bgra_to_rgba PRIVATE bitmap)
+  set_target_properties(fuzz_swizzle_bgra_to_rgba PROPERTIES LINK_FLAGS "${FUZZ_LINK_FLAGS}")
+  message(STATUS "Fuzz target fuzz_swizzle_bgra_to_rgba added.")
+
+  # Fuzzer for RGBA to BGRA swizzle
+  add_executable(fuzz_swizzle_rgba_to_bgra fuzz/fuzz_swizzle_rgba_to_bgra.cpp)
+  target_link_libraries(fuzz_swizzle_rgba_to_bgra PRIVATE bitmap)
+  set_target_properties(fuzz_swizzle_rgba_to_bgra PROPERTIES LINK_FLAGS "${FUZZ_LINK_FLAGS}")
+  message(STATUS "Fuzz target fuzz_swizzle_rgba_to_bgra added.")
   
 endif()
 # Note: The public include directories from the 'bitmap' target 
diff --git a/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp b/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
new file mode 100644
index 0000000..afe57a3
--- /dev/null
+++ b/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
@@ -0,0 +1,34 @@
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+#include "../src/bitmap/bitmap.h" // For ::Pixel and internal_convert_bgr_to_bgra_simd
+
+// Limit max pixels to prevent excessive memory allocation during fuzzing.
+const size_t MAX_FUZZ_PIXELS = 1024 * 1024; // Approx 4MB for BGRA, 3MB for BGR
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+    if (Size < 3) { // Need at least one BGR pixel
+        return 0;
+    }
+
+    size_t num_pixels = Size / 3;
+    if (num_pixels == 0) {
+        return 0;
+    }
+    if (num_pixels > MAX_FUZZ_PIXELS) {
+        num_pixels = MAX_FUZZ_PIXELS;
+        // Adjust Size to match the truncated num_pixels for the source data,
+        // though internal_convert_bgr_to_bgra_simd only reads num_pixels * 3 bytes.
+        // No, Data pointer is const, and Size is the original size.
+        // The function will just process fewer pixels from the original Data buffer.
+    }
+
+    std::vector<::Pixel> output_pixels(num_pixels);
+    // The source data (Data) is used directly.
+    // internal_convert_bgr_to_bgra_simd will read num_pixels * 3 bytes from Data.
+    // This is safe because num_pixels was derived from Size/3.
+    // If num_pixels was capped by MAX_FUZZ_PIXELS, it reads a sub-segment of the original data.
+    internal_convert_bgr_to_bgra_simd(Data, output_pixels.data(), num_pixels);
+    
+    return 0; // Non-zero return values are reserved for future use.
+}
diff --git a/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp b/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
new file mode 100644
index 0000000..d14007b
--- /dev/null
+++ b/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
@@ -0,0 +1,37 @@
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+#include "../src/bitmap/bitmap.h" // For ::Pixel
+#include "../src/format/format_internal_helpers.hpp" // For internal_swizzle_bgra_to_rgba_simd
+
+const size_t MAX_FUZZ_PIXELS = 1024 * 1024;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+    if (Size < sizeof(::Pixel)) { // Need at least one ::Pixel
+        return 0;
+    }
+    // Ensure Size is a multiple of sizeof(::Pixel)
+    // If not, we can't safely cast Data to ::Pixel* for the full Size.
+    // The function expects a certain number of full Pixel structs.
+    if (Size % sizeof(::Pixel) != 0) {
+        // Or alternatively, could calculate num_pixels based on floor(Size / sizeof(::Pixel))
+        // and only pass that many. For stricter fuzzing, returning if not aligned might be intended.
+        return 0; 
+    }
+
+    size_t num_pixels = Size / sizeof(::Pixel);
+    if (num_pixels == 0) { // Should be caught by Size < sizeof(::Pixel) already
+        return 0;
+    }
+    if (num_pixels > MAX_FUZZ_PIXELS) {
+        num_pixels = MAX_FUZZ_PIXELS;
+    }
+    
+    std::vector<uint8_t> rgba_output(num_pixels * 4);
+    // Data is cast to const ::Pixel*. The function will read num_pixels from this array.
+    // This is safe because num_pixels was derived from Size / sizeof(::Pixel).
+    // If num_pixels was capped, it processes a sub-segment.
+    internal_swizzle_bgra_to_rgba_simd(reinterpret_cast<const ::Pixel*>(Data), rgba_output.data(), num_pixels);
+    
+    return 0;
+}
diff --git a/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp b/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
new file mode 100644
index 0000000..5e0b9a5
--- /dev/null
+++ b/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
@@ -0,0 +1,36 @@
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+#include "../src/bitmap/bitmap.h" // For ::Pixel
+#include "../src/format/format_internal_helpers.hpp" // For internal_swizzle_rgba_to_bgra_simd
+
+const size_t MAX_FUZZ_PIXELS = 1024 * 1024;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+    if (Size < 4) { // Need at least one RGBA pixel (4 bytes)
+        return 0;
+    }
+    // Ensure Size is a multiple of 4 for RGBA pixels
+    if (Size % 4 != 0) {
+        // Similar to the BGRA->RGBA fuzzer, could truncate or return.
+        // Returning if not perfectly divisible ensures strict input format.
+        return 0;
+    }
+
+    size_t num_pixels = Size / 4;
+    if (num_pixels == 0) { // Should be caught by Size < 4 already
+        return 0;
+    }
+    if (num_pixels > MAX_FUZZ_PIXELS) {
+        num_pixels = MAX_FUZZ_PIXELS;
+    }
+
+    std::vector<::Pixel> bgra_output(num_pixels);
+    // Data is uint8_t* source. internal_swizzle_rgba_to_bgra_simd expects this.
+    // It will read num_pixels * 4 bytes from Data.
+    // This is safe as num_pixels was derived from Size / 4.
+    // If num_pixels was capped, it processes a sub-segment.
+    internal_swizzle_rgba_to_bgra_simd(Data, bgra_output.data(), num_pixels);
+    
+    return 0;
+}
diff --git a/tests/test_swizzle.cpp b/tests/test_swizzle.cpp
new file mode 100644
index 0000000..75f47de
--- /dev/null
+++ b/tests/test_swizzle.cpp
@@ -0,0 +1,149 @@
+#include <vector>
+#include <cstdint> // For uint8_t
+#include <string>  // For std::to_string
+#include "gtest/gtest.h"
+// For Pixel struct and internal_convert_bgr_to_bgra_simd declaration
+#include "../src/bitmap/bitmap.h" 
+#include "../src/format/format_internal_helpers.hpp" // For BGRA <-> RGBA swizzle helpers
+// #include "../src/simd_utils.hpp" // Not strictly needed here as func is in .cpp
+
+// --- Tests for BGR -> BGRA swizzling (from internal_convert_bgr_to_bgra_simd) ---
+
+// Test case for a single pixel conversion BGR -> BGRA
+TEST(SwizzleBGRtoBGRATest, SinglePixel) {
+    uint8_t bgr_input[] = {0x01, 0x02, 0x03}; // B, G, R
+    ::Pixel output_pixel_array[1];
+    internal_convert_bgr_to_bgra_simd(bgr_input, output_pixel_array, 1);
+    EXPECT_EQ(output_pixel_array[0].blue, 0x01);
+    EXPECT_EQ(output_pixel_array[0].green, 0x02);
+    EXPECT_EQ(output_pixel_array[0].red, 0x03);
+    EXPECT_EQ(output_pixel_array[0].alpha, 255);
+}
+
+// Helper function for various sizes test for BGR -> BGRA
+void check_bgr_to_bgra_conversion(size_t num_pixels) {
+    SCOPED_TRACE("num_pixels (BGRtoBGRA): " + std::to_string(num_pixels)); 
+    if (num_pixels == 0) {
+        internal_convert_bgr_to_bgra_simd(nullptr, nullptr, 0);
+        SUCCEED(); 
+        return;
+    }
+    std::vector<uint8_t> bgr_input(num_pixels * 3);
+    std::vector<::Pixel> output_pixels(num_pixels);
+    for (size_t i = 0; i < num_pixels; ++i) {
+        bgr_input[i*3+0] = static_cast<uint8_t>(((i*3+1)%250) + 1); // Blue
+        bgr_input[i*3+1] = static_cast<uint8_t>(((i*3+2)%250) + 1); // Green
+        bgr_input[i*3+2] = static_cast<uint8_t>(((i*3+3)%250) + 1); // Red
+    }
+    internal_convert_bgr_to_bgra_simd(bgr_input.data(), output_pixels.data(), num_pixels);
+    for (size_t j = 0; j < num_pixels; ++j) {
+        ASSERT_EQ(output_pixels[j].blue, bgr_input[j*3+0]);
+        ASSERT_EQ(output_pixels[j].green, bgr_input[j*3+1]);
+        ASSERT_EQ(output_pixels[j].red, bgr_input[j*3+2]);
+        ASSERT_EQ(output_pixels[j].alpha, 255);
+    }
+}
+
+TEST(SwizzleBGRtoBGRATest, VariousSizes) {
+    std::vector<size_t> sizes_to_test = {0, 1, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 100};
+    for (size_t size : sizes_to_test) {
+        check_bgr_to_bgra_conversion(size);
+    }
+}
+
+TEST(SwizzleBGRtoBGRATest, AllZeros) {
+    const size_t num_pixels = 32;
+    std::vector<uint8_t> bgr_input(num_pixels * 3, 0);
+    std::vector<::Pixel> output_pixels(num_pixels);
+    internal_convert_bgr_to_bgra_simd(bgr_input.data(), output_pixels.data(), num_pixels);
+    for (size_t j = 0; j < num_pixels; ++j) {
+        EXPECT_EQ(output_pixels[j].blue, 0);
+        EXPECT_EQ(output_pixels[j].green, 0);
+        EXPECT_EQ(output_pixels[j].red, 0);
+        EXPECT_EQ(output_pixels[j].alpha, 255);
+    }
+}
+
+TEST(SwizzleBGRtoBGRATest, AllMaxRGB) {
+    const size_t num_pixels = 32;
+    const uint8_t max_val = 254; 
+    std::vector<uint8_t> bgr_input(num_pixels * 3, max_val);
+    std::vector<::Pixel> output_pixels(num_pixels);
+    internal_convert_bgr_to_bgra_simd(bgr_input.data(), output_pixels.data(), num_pixels);
+    for (size_t j = 0; j < num_pixels; ++j) {
+        EXPECT_EQ(output_pixels[j].blue, max_val);
+        EXPECT_EQ(output_pixels[j].green, max_val);
+        EXPECT_EQ(output_pixels[j].red, max_val);
+        EXPECT_EQ(output_pixels[j].alpha, 255);
+    }
+}
+
+// --- Tests for BGRA <-> RGBA swizzling (from format_internal_helpers.hpp) ---
+
+// Helper function for BGRA -> RGBA tests
+void check_bgra_to_rgba_conversion(size_t num_pixels) {
+    SCOPED_TRACE("num_pixels (BGRAtoRGBA): " + std::to_string(num_pixels));
+    if (num_pixels == 0) {
+        internal_swizzle_bgra_to_rgba_simd(nullptr, nullptr, 0);
+        SUCCEED();
+        return;
+    }
+    std::vector<::Pixel> bgra_input(num_pixels);
+    for (size_t j = 0; j < num_pixels; ++j) {
+        bgra_input[j].blue  = static_cast<uint8_t>(((j * 4 + 0) % 250) + 1);
+        bgra_input[j].green = static_cast<uint8_t>(((j * 4 + 1) % 250) + 1);
+        bgra_input[j].red   = static_cast<uint8_t>(((j * 4 + 2) % 250) + 1);
+        bgra_input[j].alpha = static_cast<uint8_t>(((j * 4 + 3) % 250) + 1);
+    }
+    std::vector<uint8_t> rgba_output(num_pixels * 4);
+
+    internal_swizzle_bgra_to_rgba_simd(bgra_input.data(), rgba_output.data(), num_pixels);
+    
+    for (size_t j = 0; j < num_pixels; ++j) {
+        ASSERT_EQ(rgba_output[j*4+0], bgra_input[j].red);   // R
+        ASSERT_EQ(rgba_output[j*4+1], bgra_input[j].green); // G
+        ASSERT_EQ(rgba_output[j*4+2], bgra_input[j].blue);  // B
+        ASSERT_EQ(rgba_output[j*4+3], bgra_input[j].alpha); // A
+    }
+}
+
+TEST(SwizzleBGRARGATest, BGRAtoRGBAVariousSizes) {
+    std::vector<size_t> sizes_to_test = {0, 1, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 100};
+    for (size_t size : sizes_to_test) {
+        check_bgra_to_rgba_conversion(size);
+    }
+}
+
+// Helper function for RGBA -> BGRA tests
+void check_rgba_to_bgra_conversion(size_t num_pixels) {
+    SCOPED_TRACE("num_pixels (RGBAtoBGRA): " + std::to_string(num_pixels));
+    if (num_pixels == 0) {
+        internal_swizzle_rgba_to_bgra_simd(nullptr, nullptr, 0);
+        SUCCEED();
+        return;
+    }
+    std::vector<uint8_t> rgba_input(num_pixels * 4);
+    for (size_t j = 0; j < num_pixels; ++j) {
+        rgba_input[j*4+0] = static_cast<uint8_t>(((j * 4 + 0) % 250) + 1); // R
+        rgba_input[j*4+1] = static_cast<uint8_t>(((j * 4 + 1) % 250) + 1); // G
+        rgba_input[j*4+2] = static_cast<uint8_t>(((j * 4 + 2) % 250) + 1); // B
+        rgba_input[j*4+3] = static_cast<uint8_t>(((j * 4 + 3) % 250) + 1); // A
+    }
+    std::vector<::Pixel> bgra_output(num_pixels);
+
+    internal_swizzle_rgba_to_bgra_simd(rgba_input.data(), bgra_output.data(), num_pixels);
+
+    for (size_t j = 0; j < num_pixels; ++j) {
+        ASSERT_EQ(bgra_output[j].red,   rgba_input[j*4+0]); // R
+        ASSERT_EQ(bgra_output[j].green, rgba_input[j*4+1]); // G
+        ASSERT_EQ(bgra_output[j].blue,  rgba_input[j*4+2]); // B
+        ASSERT_EQ(bgra_output[j].alpha, rgba_input[j*4+3]); // A
+    }
+}
+
+TEST(SwizzleBGRARGATest, RGBAtoBGRAVariousSizes) {
+    std::vector<size_t> sizes_to_test = {0, 1, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 100};
+    for (size_t size : sizes_to_test) {
+        check_rgba_to_bgra_conversion(size);
+    }
+}

From 7145dbd0eff1497c7c64ac971c8a5a567c49efa0 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Fri, 30 May 2025 13:34:33 +0000
Subject: [PATCH 2/5] fix: Add missing declaration for BGR->BGRA helper

The declaration for `internal_convert_bgr_to_bgra_simd` was missing
from `src/bitmap/bitmap.h`, causing build failures when compiling
the unit tests in `tests/test_swizzle.cpp`.

This commit adds the required declaration.

This also includes the previous work:
feat: Add SIMD pixel swizzling with tests and fuzzers

This commit introduces SIMD (AVX2, SSSE3, NEON) optimizations for pixel format conversion and swizzling operations, along with corresponding unit tests and fuzz targets.

Optimizations Implemented:
- BGR (24-bit) to BGRA (32-bit, Alpha=255) conversion in `CreateMatrixFromBitmap`.
- BGRA to RGBA swizzle (e.g., for `BmpTool::load`).
- RGBA to BGRA swizzle (e.g., for `BmpTool::save`).
- Corrected Alpha handling for 24-bit BGR sources to set Alpha to 255 (opaque).

Refactoring:
- The core SIMD and scalar logic for these operations has been refactored into helper functions:
  - `internal_convert_bgr_to_bgra_simd`
  - `internal_swizzle_bgra_to_rgba_simd`
  - `internal_swizzle_rgba_to_bgra_simd`
- These helpers are declared in `src/bitmap/bitmap.h` and `src/format/format_internal_helpers.hpp` for testability.

Testing:
- Added new Google Tests in `tests/test_swizzle.cpp` for all three helper functions, covering various input sizes, edge cases (0 pixels), and content verification.
- Added new fuzz targets in `tests/fuzz/` for each helper function to detect crashes and sanitizer issues with random inputs:
  - `fuzz_convert_bgr_to_bgra.cpp`
  - `fuzz_swizzle_bgra_to_rgba.cpp`
  - `fuzz_swizzle_rgba_to_bgra.cpp`
- Updated `tests/CMakeLists.txt` to integrate the new unit tests and fuzz targets into the build system.
---
 src/bitmap/bitmap.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/bitmap/bitmap.h b/src/bitmap/bitmap.h
index 619beee..4f0edd7 100644
--- a/src/bitmap/bitmap.h
+++ b/src/bitmap/bitmap.h
@@ -417,4 +417,7 @@ Pixel ApplySepiaToPixel(Pixel pixel);
 
 // Note: Box blur is applied over a region within ApplyBoxBlur,
 // so it does not have a direct single-pixel helper function here.
+
+// Helper function for BGR to BGRA conversion (primarily for testing)
+void internal_convert_bgr_to_bgra_simd(const uint8_t* src_bgr_data, ::Pixel* dest_bgra_pixels, size_t num_pixels);
 #endif
\ No newline at end of file

From 7a139045118e109fc1fc5b4e4b8f5ef340004787 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Fri, 30 May 2025 13:39:38 +0000
Subject: [PATCH 3/5] fix: Resolve linker errors for swizzle helpers

Corrects "undefined reference" linker errors for SIMD helper functions:
- Added missing declaration for `internal_convert_bgr_to_bgra_simd` to `src/bitmap/bitmap.h`.
- Placed declarations for `internal_swizzle_bgra_to_rgba_simd` and `internal_swizzle_rgba_to_bgra_simd` into the `BmpTool` namespace in `src/format/format_internal_helpers.hpp` to match their definitions.
- Updated calls in `tests/test_swizzle.cpp` to use `BmpTool::` namespace qualifier for these functions.

This also includes the previous work:
feat: Add SIMD pixel swizzling with tests and fuzzers

This commit introduces SIMD (AVX2, SSSE3, NEON) optimizations for pixel format conversion and swizzling operations, along with corresponding unit tests and fuzz targets.

Optimizations Implemented:
- BGR (24-bit) to BGRA (32-bit, Alpha=255) conversion in `CreateMatrixFromBitmap`.
- BGRA to RGBA swizzle (e.g., for `BmpTool::load`).
- RGBA to BGRA swizzle (e.g., for `BmpTool::save`).
- Corrected Alpha handling for 24-bit BGR sources to set Alpha to 255 (opaque).

Refactoring:
- The core SIMD and scalar logic for these operations has been refactored into helper functions:
  - `internal_convert_bgr_to_bgra_simd`
  - `internal_swizzle_bgra_to_rgba_simd`
  - `internal_swizzle_rgba_to_bgra_simd`

Testing:
- Added new Google Tests in `tests/test_swizzle.cpp` for all three helper functions.
- Added new fuzz targets in `tests/fuzz/` for each helper function.
- Updated `tests/CMakeLists.txt` to integrate new tests and fuzzers.
---
 src/bitmap/bitmap.cpp                    | 18 +++++++++---------
 src/format/bitmap.cpp                    |  2 +-
 src/format/format_internal_helpers.hpp   |  9 +++++----
 tests/fuzz/fuzz_convert_bgr_to_bgra.cpp  |  2 +-
 tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp |  6 +++---
 tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp |  2 +-
 tests/test_swizzle.cpp                   | 18 +++++++++---------
 7 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/src/bitmap/bitmap.cpp b/src/bitmap/bitmap.cpp
index f248681..7985cdd 100644
--- a/src/bitmap/bitmap.cpp
+++ b/src/bitmap/bitmap.cpp
@@ -197,12 +197,12 @@ void internal_convert_bgr_to_bgra_simd(const uint8_t* src_row_bgr_ptr, ::Pixel*
 #endif
 
 #if defined(__AVX2__) || defined(__SSSE3__) // Use SSSE3 for AVX2 as well if no specific AVX2 code
-    const size_t pixels_per_step = 4; 
+    const size_t pixels_per_step = 4;
     __m128i bgr_to_bgrX_mask = _mm_setr_epi8(
-        0, 1, 2, (char)0x80, 
-        3, 4, 5, (char)0x80, 
-        6, 7, 8, (char)0x80, 
-        9, 10, 11, (char)0x80 
+        0, 1, 2, (char)0x80,
+        3, 4, 5, (char)0x80,
+        6, 7, 8, (char)0x80,
+        9, 10, 11, (char)0x80
     );
     __m128i alpha_channel_ff = _mm_setr_epi8(
         0,0,0, (char)0xFF, 0,0,0,(char)0xFF, 0,0,0,(char)0xFF, 0,0,0,(char)0xFF
@@ -213,13 +213,13 @@ void internal_convert_bgr_to_bgra_simd(const uint8_t* src_row_bgr_ptr, ::Pixel*
         __m128i bgra_pixels_expanded = _mm_shuffle_epi8(bgr_data, bgr_to_bgrX_mask);
         __m128i bgra_pixels_final = _mm_or_si128(bgra_pixels_expanded, alpha_channel_ff);
         _mm_storeu_si128(reinterpret_cast<__m128i*>(dest_row_pixel_ptr + current_dest_pixel_idx), bgra_pixels_final);
-        current_src_byte_offset += pixels_per_step * 3; 
-        current_dest_pixel_idx += pixels_per_step;    
+        current_src_byte_offset += pixels_per_step * 3;
+        current_dest_pixel_idx += pixels_per_step;
         num_pixels_to_process -= pixels_per_step;
     }
 #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
     const size_t pixels_per_step = 4;
-    const uint8_t table_bgr_to_bgr0[] = {0,1,2,16, 3,4,5,16, 6,7,8,16, 9,10,11,16}; 
+    const uint8_t table_bgr_to_bgr0[] = {0,1,2,16, 3,4,5,16, 6,7,8,16, 9,10,11,16};
     uint8x16_t neon_shuffle_table = vld1q_u8(table_bgr_to_bgr0);
     const uint8_t alpha_bytes[] = {0,0,0,0xFF, 0,0,0,0xFF, 0,0,0,0xFF, 0,0,0,0xFF};
     uint8x16_t alpha_channel_ff_neon = vld1q_u8(alpha_bytes);
@@ -235,7 +235,7 @@ void internal_convert_bgr_to_bgra_simd(const uint8_t* src_row_bgr_ptr, ::Pixel*
         current_dest_pixel_idx += pixels_per_step;
         num_pixels_to_process -= pixels_per_step;
     }
-#else 
+#else
     // This #else block ensures that if no SIMD path is taken (e.g. SSSE3/NEON not defined, or AVX2 defined but its specific block is empty and it's not grouped with SSSE3),
     // the scalar loop below is the ONLY path for processing.
     // The current structure with #if defined(__AVX2__) || defined(__SSSE3__) followed by #elif defined(__ARM_NEON)
diff --git a/src/format/bitmap.cpp b/src/format/bitmap.cpp
index 1c0eb06..44ddc7a 100644
--- a/src/format/bitmap.cpp
+++ b/src/format/bitmap.cpp
@@ -352,7 +352,7 @@ void internal_swizzle_rgba_to_bgra_simd(const uint8_t* src_rgba_data, ::Pixel* d
     for (size_t i = 0; i < num_pixels_to_process; ++i) {
         const uint8_t* src_pixel_ptr = src_rgba_data + (current_pixel_idx + i) * 4;
         ::Pixel& dest_pixel = *(dest_bgra_pixels + current_pixel_idx + i);
-        
+
         dest_pixel.red   = src_pixel_ptr[0]; // R
         dest_pixel.green = src_pixel_ptr[1]; // G
         dest_pixel.blue  = src_pixel_ptr[2]; // B
diff --git a/src/format/format_internal_helpers.hpp b/src/format/format_internal_helpers.hpp
index 450da31..3658a7f 100644
--- a/src/format/format_internal_helpers.hpp
+++ b/src/format/format_internal_helpers.hpp
@@ -2,10 +2,11 @@
 
 #include <cstddef> // For size_t
 // Include bitmap.h for the definition of ::Pixel
-#include "../bitmap/bitmap.h" 
+#include "../bitmap/bitmap.h"
 
-// Helper function to convert an array of ::Pixel (BGRA order) to an array of uint8_t (RGBA order)
-void internal_swizzle_bgra_to_rgba_simd(const ::Pixel* src_bgra_pixels, uint8_t* dest_rgba_data, size_t num_pixels);
+namespace BmpTool {
 
-// Helper function to convert an array of uint8_t (RGBA order) to an array of ::Pixel (BGRA order)
+void internal_swizzle_bgra_to_rgba_simd(const ::Pixel* src_bgra_pixels, uint8_t* dest_rgba_data, size_t num_pixels);
 void internal_swizzle_rgba_to_bgra_simd(const uint8_t* src_rgba_data, ::Pixel* dest_bgra_pixels, size_t num_pixels);
+
+} // namespace BmpTool
diff --git a/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp b/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
index afe57a3..470699b 100644
--- a/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
+++ b/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
@@ -29,6 +29,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     // This is safe because num_pixels was derived from Size/3.
     // If num_pixels was capped by MAX_FUZZ_PIXELS, it reads a sub-segment of the original data.
     internal_convert_bgr_to_bgra_simd(Data, output_pixels.data(), num_pixels);
-    
+
     return 0; // Non-zero return values are reserved for future use.
 }
diff --git a/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp b/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
index d14007b..6cb605f 100644
--- a/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
+++ b/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
@@ -16,7 +16,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     if (Size % sizeof(::Pixel) != 0) {
         // Or alternatively, could calculate num_pixels based on floor(Size / sizeof(::Pixel))
         // and only pass that many. For stricter fuzzing, returning if not aligned might be intended.
-        return 0; 
+        return 0;
     }
 
     size_t num_pixels = Size / sizeof(::Pixel);
@@ -26,12 +26,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     if (num_pixels > MAX_FUZZ_PIXELS) {
         num_pixels = MAX_FUZZ_PIXELS;
     }
-    
+
     std::vector<uint8_t> rgba_output(num_pixels * 4);
     // Data is cast to const ::Pixel*. The function will read num_pixels from this array.
     // This is safe because num_pixels was derived from Size / sizeof(::Pixel).
     // If num_pixels was capped, it processes a sub-segment.
     internal_swizzle_bgra_to_rgba_simd(reinterpret_cast<const ::Pixel*>(Data), rgba_output.data(), num_pixels);
-    
+
     return 0;
 }
diff --git a/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp b/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
index 5e0b9a5..7c48d16 100644
--- a/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
+++ b/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
@@ -31,6 +31,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     // This is safe as num_pixels was derived from Size / 4.
     // If num_pixels was capped, it processes a sub-segment.
     internal_swizzle_rgba_to_bgra_simd(Data, bgra_output.data(), num_pixels);
-    
+
     return 0;
 }
diff --git a/tests/test_swizzle.cpp b/tests/test_swizzle.cpp
index 75f47de..e26c017 100644
--- a/tests/test_swizzle.cpp
+++ b/tests/test_swizzle.cpp
@@ -3,7 +3,7 @@
 #include <string>  // For std::to_string
 #include "gtest/gtest.h"
 // For Pixel struct and internal_convert_bgr_to_bgra_simd declaration
-#include "../src/bitmap/bitmap.h" 
+#include "../src/bitmap/bitmap.h"
 #include "../src/format/format_internal_helpers.hpp" // For BGRA <-> RGBA swizzle helpers
 // #include "../src/simd_utils.hpp" // Not strictly needed here as func is in .cpp
 
@@ -22,10 +22,10 @@ TEST(SwizzleBGRtoBGRATest, SinglePixel) {
 
 // Helper function for various sizes test for BGR -> BGRA
 void check_bgr_to_bgra_conversion(size_t num_pixels) {
-    SCOPED_TRACE("num_pixels (BGRtoBGRA): " + std::to_string(num_pixels)); 
+    SCOPED_TRACE("num_pixels (BGRtoBGRA): " + std::to_string(num_pixels));
     if (num_pixels == 0) {
         internal_convert_bgr_to_bgra_simd(nullptr, nullptr, 0);
-        SUCCEED(); 
+        SUCCEED();
         return;
     }
     std::vector<uint8_t> bgr_input(num_pixels * 3);
@@ -66,7 +66,7 @@ TEST(SwizzleBGRtoBGRATest, AllZeros) {
 
 TEST(SwizzleBGRtoBGRATest, AllMaxRGB) {
     const size_t num_pixels = 32;
-    const uint8_t max_val = 254; 
+    const uint8_t max_val = 254;
     std::vector<uint8_t> bgr_input(num_pixels * 3, max_val);
     std::vector<::Pixel> output_pixels(num_pixels);
     internal_convert_bgr_to_bgra_simd(bgr_input.data(), output_pixels.data(), num_pixels);
@@ -84,7 +84,7 @@ TEST(SwizzleBGRtoBGRATest, AllMaxRGB) {
 void check_bgra_to_rgba_conversion(size_t num_pixels) {
     SCOPED_TRACE("num_pixels (BGRAtoRGBA): " + std::to_string(num_pixels));
     if (num_pixels == 0) {
-        internal_swizzle_bgra_to_rgba_simd(nullptr, nullptr, 0);
+        BmpTool::internal_swizzle_bgra_to_rgba_simd(nullptr, nullptr, 0);
         SUCCEED();
         return;
     }
@@ -97,8 +97,8 @@ void check_bgra_to_rgba_conversion(size_t num_pixels) {
     }
     std::vector<uint8_t> rgba_output(num_pixels * 4);
 
-    internal_swizzle_bgra_to_rgba_simd(bgra_input.data(), rgba_output.data(), num_pixels);
-    
+    BmpTool::internal_swizzle_bgra_to_rgba_simd(bgra_input.data(), rgba_output.data(), num_pixels);
+
     for (size_t j = 0; j < num_pixels; ++j) {
         ASSERT_EQ(rgba_output[j*4+0], bgra_input[j].red);   // R
         ASSERT_EQ(rgba_output[j*4+1], bgra_input[j].green); // G
@@ -118,7 +118,7 @@ TEST(SwizzleBGRARGATest, BGRAtoRGBAVariousSizes) {
 void check_rgba_to_bgra_conversion(size_t num_pixels) {
     SCOPED_TRACE("num_pixels (RGBAtoBGRA): " + std::to_string(num_pixels));
     if (num_pixels == 0) {
-        internal_swizzle_rgba_to_bgra_simd(nullptr, nullptr, 0);
+        BmpTool::internal_swizzle_rgba_to_bgra_simd(nullptr, nullptr, 0);
         SUCCEED();
         return;
     }
@@ -131,7 +131,7 @@ void check_rgba_to_bgra_conversion(size_t num_pixels) {
     }
     std::vector<::Pixel> bgra_output(num_pixels);
 
-    internal_swizzle_rgba_to_bgra_simd(rgba_input.data(), bgra_output.data(), num_pixels);
+    BmpTool::internal_swizzle_rgba_to_bgra_simd(rgba_input.data(), bgra_output.data(), num_pixels);
 
     for (size_t j = 0; j < num_pixels; ++j) {
         ASSERT_EQ(bgra_output[j].red,   rgba_input[j*4+0]); // R

From e2d3a43a5dbf8943a8aadaec1df3ccf0c3e79ab4 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Fri, 30 May 2025 13:43:54 +0000
Subject: [PATCH 4/5] fix: Qualify swizzle helper calls in fuzz targets

Corrects build errors in fuzz targets by adding the `BmpTool::`
namespace qualifier to calls of:
- `internal_swizzle_bgra_to_rgba_simd` in `fuzz_swizzle_bgra_to_rgba.cpp`
- `internal_swizzle_rgba_to_bgra_simd` in `fuzz_swizzle_rgba_to_bgra.cpp`

This aligns the calls with the function declarations which are
namespaced in `format_internal_helpers.hpp`.

This also includes the previous work:
fix: Resolve linker errors for swizzle helpers
feat: Add SIMD pixel swizzling with tests and fuzzers
---
 src/bitmap/bitmap.cpp                    | 18 +++++++++---------
 src/format/bitmap.cpp                    |  2 +-
 src/format/format_internal_helpers.hpp   |  2 +-
 tests/fuzz/fuzz_convert_bgr_to_bgra.cpp  |  2 +-
 tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp |  8 ++++----
 tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp |  4 ++--
 tests/test_swizzle.cpp                   | 10 +++++-----
 7 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/bitmap/bitmap.cpp b/src/bitmap/bitmap.cpp
index 7985cdd..f248681 100644
--- a/src/bitmap/bitmap.cpp
+++ b/src/bitmap/bitmap.cpp
@@ -197,12 +197,12 @@ void internal_convert_bgr_to_bgra_simd(const uint8_t* src_row_bgr_ptr, ::Pixel*
 #endif
 
 #if defined(__AVX2__) || defined(__SSSE3__) // Use SSSE3 for AVX2 as well if no specific AVX2 code
-    const size_t pixels_per_step = 4;
+    const size_t pixels_per_step = 4; 
     __m128i bgr_to_bgrX_mask = _mm_setr_epi8(
-        0, 1, 2, (char)0x80,
-        3, 4, 5, (char)0x80,
-        6, 7, 8, (char)0x80,
-        9, 10, 11, (char)0x80
+        0, 1, 2, (char)0x80, 
+        3, 4, 5, (char)0x80, 
+        6, 7, 8, (char)0x80, 
+        9, 10, 11, (char)0x80 
     );
     __m128i alpha_channel_ff = _mm_setr_epi8(
         0,0,0, (char)0xFF, 0,0,0,(char)0xFF, 0,0,0,(char)0xFF, 0,0,0,(char)0xFF
@@ -213,13 +213,13 @@ void internal_convert_bgr_to_bgra_simd(const uint8_t* src_row_bgr_ptr, ::Pixel*
         __m128i bgra_pixels_expanded = _mm_shuffle_epi8(bgr_data, bgr_to_bgrX_mask);
         __m128i bgra_pixels_final = _mm_or_si128(bgra_pixels_expanded, alpha_channel_ff);
         _mm_storeu_si128(reinterpret_cast<__m128i*>(dest_row_pixel_ptr + current_dest_pixel_idx), bgra_pixels_final);
-        current_src_byte_offset += pixels_per_step * 3;
-        current_dest_pixel_idx += pixels_per_step;
+        current_src_byte_offset += pixels_per_step * 3; 
+        current_dest_pixel_idx += pixels_per_step;    
         num_pixels_to_process -= pixels_per_step;
     }
 #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
     const size_t pixels_per_step = 4;
-    const uint8_t table_bgr_to_bgr0[] = {0,1,2,16, 3,4,5,16, 6,7,8,16, 9,10,11,16};
+    const uint8_t table_bgr_to_bgr0[] = {0,1,2,16, 3,4,5,16, 6,7,8,16, 9,10,11,16}; 
     uint8x16_t neon_shuffle_table = vld1q_u8(table_bgr_to_bgr0);
     const uint8_t alpha_bytes[] = {0,0,0,0xFF, 0,0,0,0xFF, 0,0,0,0xFF, 0,0,0,0xFF};
     uint8x16_t alpha_channel_ff_neon = vld1q_u8(alpha_bytes);
@@ -235,7 +235,7 @@ void internal_convert_bgr_to_bgra_simd(const uint8_t* src_row_bgr_ptr, ::Pixel*
         current_dest_pixel_idx += pixels_per_step;
         num_pixels_to_process -= pixels_per_step;
     }
-#else
+#else 
     // This #else block ensures that if no SIMD path is taken (e.g. SSSE3/NEON not defined, or AVX2 defined but its specific block is empty and it's not grouped with SSSE3),
     // the scalar loop below is the ONLY path for processing.
     // The current structure with #if defined(__AVX2__) || defined(__SSSE3__) followed by #elif defined(__ARM_NEON)
diff --git a/src/format/bitmap.cpp b/src/format/bitmap.cpp
index 44ddc7a..1c0eb06 100644
--- a/src/format/bitmap.cpp
+++ b/src/format/bitmap.cpp
@@ -352,7 +352,7 @@ void internal_swizzle_rgba_to_bgra_simd(const uint8_t* src_rgba_data, ::Pixel* d
     for (size_t i = 0; i < num_pixels_to_process; ++i) {
         const uint8_t* src_pixel_ptr = src_rgba_data + (current_pixel_idx + i) * 4;
         ::Pixel& dest_pixel = *(dest_bgra_pixels + current_pixel_idx + i);
-
+        
         dest_pixel.red   = src_pixel_ptr[0]; // R
         dest_pixel.green = src_pixel_ptr[1]; // G
         dest_pixel.blue  = src_pixel_ptr[2]; // B
diff --git a/src/format/format_internal_helpers.hpp b/src/format/format_internal_helpers.hpp
index 3658a7f..fa39270 100644
--- a/src/format/format_internal_helpers.hpp
+++ b/src/format/format_internal_helpers.hpp
@@ -2,7 +2,7 @@
 
 #include <cstddef> // For size_t
 // Include bitmap.h for the definition of ::Pixel
-#include "../bitmap/bitmap.h"
+#include "../bitmap/bitmap.h" 
 
 namespace BmpTool {
 
diff --git a/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp b/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
index 470699b..afe57a3 100644
--- a/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
+++ b/tests/fuzz/fuzz_convert_bgr_to_bgra.cpp
@@ -29,6 +29,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     // This is safe because num_pixels was derived from Size/3.
     // If num_pixels was capped by MAX_FUZZ_PIXELS, it reads a sub-segment of the original data.
     internal_convert_bgr_to_bgra_simd(Data, output_pixels.data(), num_pixels);
-
+    
     return 0; // Non-zero return values are reserved for future use.
 }
diff --git a/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp b/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
index 6cb605f..682d220 100644
--- a/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
+++ b/tests/fuzz/fuzz_swizzle_bgra_to_rgba.cpp
@@ -16,7 +16,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     if (Size % sizeof(::Pixel) != 0) {
         // Or alternatively, could calculate num_pixels based on floor(Size / sizeof(::Pixel))
         // and only pass that many. For stricter fuzzing, returning if not aligned might be intended.
-        return 0;
+        return 0; 
     }
 
     size_t num_pixels = Size / sizeof(::Pixel);
@@ -26,12 +26,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     if (num_pixels > MAX_FUZZ_PIXELS) {
         num_pixels = MAX_FUZZ_PIXELS;
     }
-
+    
     std::vector<uint8_t> rgba_output(num_pixels * 4);
     // Data is cast to const ::Pixel*. The function will read num_pixels from this array.
     // This is safe because num_pixels was derived from Size / sizeof(::Pixel).
     // If num_pixels was capped, it processes a sub-segment.
-    internal_swizzle_bgra_to_rgba_simd(reinterpret_cast<const ::Pixel*>(Data), rgba_output.data(), num_pixels);
-
+    BmpTool::internal_swizzle_bgra_to_rgba_simd(reinterpret_cast<const ::Pixel*>(Data), rgba_output.data(), num_pixels);
+    
     return 0;
 }
diff --git a/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp b/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
index 7c48d16..aef6224 100644
--- a/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
+++ b/tests/fuzz/fuzz_swizzle_rgba_to_bgra.cpp
@@ -30,7 +30,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     // It will read num_pixels * 4 bytes from Data.
     // This is safe as num_pixels was derived from Size / 4.
     // If num_pixels was capped, it processes a sub-segment.
-    internal_swizzle_rgba_to_bgra_simd(Data, bgra_output.data(), num_pixels);
-
+    BmpTool::internal_swizzle_rgba_to_bgra_simd(Data, bgra_output.data(), num_pixels);
+    
     return 0;
 }
diff --git a/tests/test_swizzle.cpp b/tests/test_swizzle.cpp
index e26c017..a32df2b 100644
--- a/tests/test_swizzle.cpp
+++ b/tests/test_swizzle.cpp
@@ -3,7 +3,7 @@
 #include <string>  // For std::to_string
 #include "gtest/gtest.h"
 // For Pixel struct and internal_convert_bgr_to_bgra_simd declaration
-#include "../src/bitmap/bitmap.h"
+#include "../src/bitmap/bitmap.h" 
 #include "../src/format/format_internal_helpers.hpp" // For BGRA <-> RGBA swizzle helpers
 // #include "../src/simd_utils.hpp" // Not strictly needed here as func is in .cpp
 
@@ -22,10 +22,10 @@ TEST(SwizzleBGRtoBGRATest, SinglePixel) {
 
 // Helper function for various sizes test for BGR -> BGRA
 void check_bgr_to_bgra_conversion(size_t num_pixels) {
-    SCOPED_TRACE("num_pixels (BGRtoBGRA): " + std::to_string(num_pixels));
+    SCOPED_TRACE("num_pixels (BGRtoBGRA): " + std::to_string(num_pixels)); 
     if (num_pixels == 0) {
         internal_convert_bgr_to_bgra_simd(nullptr, nullptr, 0);
-        SUCCEED();
+        SUCCEED(); 
         return;
     }
     std::vector<uint8_t> bgr_input(num_pixels * 3);
@@ -66,7 +66,7 @@ TEST(SwizzleBGRtoBGRATest, AllZeros) {
 
 TEST(SwizzleBGRtoBGRATest, AllMaxRGB) {
     const size_t num_pixels = 32;
-    const uint8_t max_val = 254;
+    const uint8_t max_val = 254; 
     std::vector<uint8_t> bgr_input(num_pixels * 3, max_val);
     std::vector<::Pixel> output_pixels(num_pixels);
     internal_convert_bgr_to_bgra_simd(bgr_input.data(), output_pixels.data(), num_pixels);
@@ -98,7 +98,7 @@ void check_bgra_to_rgba_conversion(size_t num_pixels) {
     std::vector<uint8_t> rgba_output(num_pixels * 4);
 
     BmpTool::internal_swizzle_bgra_to_rgba_simd(bgra_input.data(), rgba_output.data(), num_pixels);
-
+    
     for (size_t j = 0; j < num_pixels; ++j) {
         ASSERT_EQ(rgba_output[j*4+0], bgra_input[j].red);   // R
         ASSERT_EQ(rgba_output[j*4+1], bgra_input[j].green); // G

From 1b943a053584528a79a9c858639b23dcdff6085f Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Fri, 30 May 2025 13:51:57 +0000
Subject: [PATCH 5/5] ci: Run new swizzle fuzz targets in GitHub Actions

Updates `.github/workflows/fuzzing.yml` to create corpus directories
for and execute the three new fuzz targets:
- fuzz_convert_bgr_to_bgra
- fuzz_swizzle_bgra_to_rgba
- fuzz_swizzle_rgba_to_bgra

This ensures these fuzzers are run as part of the CI pipeline.

This also includes the previous work:
fix: Qualify swizzle helper calls in fuzz targets
fix: Resolve linker errors for swizzle helpers
feat: Add SIMD pixel swizzling with tests and fuzzers
---
 .github/workflows/fuzzing.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml
index 799bc3d..0fce51e 100644
--- a/.github/workflows/fuzzing.yml
+++ b/.github/workflows/fuzzing.yml
@@ -36,6 +36,9 @@ jobs:
         mkdir -p build_fuzz/corpus_fuzzing/fuzz_bitmap_file_corpus
         mkdir -p build_fuzz/corpus_fuzzing/fuzz_image_operations_corpus
         mkdir -p build_fuzz/corpus_fuzzing/fuzz_matrix_corpus
+        mkdir -p build_fuzz/corpus_fuzzing/fuzz_convert_bgr_to_bgra_corpus
+        mkdir -p build_fuzz/corpus_fuzzing/fuzz_swizzle_bgra_to_rgba_corpus
+        mkdir -p build_fuzz/corpus_fuzzing/fuzz_swizzle_rgba_to_bgra_corpus
 
     - name: Run fuzz_bitmap
       run: |
@@ -56,3 +59,15 @@ jobs:
     - name: Run fuzz_matrix
       run: |
         ./build_fuzz/tests/fuzz_matrix -max_total_time=60 -print_final_stats=1 -print_pcs=1 -error_exitcode=1 build_fuzz/corpus_fuzzing/fuzz_matrix_corpus/
+    
+    - name: Run fuzz_convert_bgr_to_bgra
+      run: |
+        ./build_fuzz/tests/fuzz_convert_bgr_to_bgra -max_total_time=60 -print_final_stats=1 -print_pcs=1 -error_exitcode=1 build_fuzz/corpus_fuzzing/fuzz_convert_bgr_to_bgra_corpus/
+    
+    - name: Run fuzz_swizzle_bgra_to_rgba
+      run: |
+        ./build_fuzz/tests/fuzz_swizzle_bgra_to_rgba -max_total_time=60 -print_final_stats=1 -print_pcs=1 -error_exitcode=1 build_fuzz/corpus_fuzzing/fuzz_swizzle_bgra_to_rgba_corpus/
+
+    - name: Run fuzz_swizzle_rgba_to_bgra
+      run: |
+        ./build_fuzz/tests/fuzz_swizzle_rgba_to_bgra -max_total_time=60 -print_final_stats=1 -print_pcs=1 -error_exitcode=1 build_fuzz/corpus_fuzzing/fuzz_swizzle_rgba_to_bgra_corpus/