From 945f10473422405fca2883a1e755e0341be7e308 Mon Sep 17 00:00:00 2001 From: Trist Date: Wed, 18 Jan 2023 12:58:59 +1000 Subject: [PATCH 1/5] Add support for bc7. --- DevIL/include/IL/devil_internal_exports.h | 3 + DevIL/include/IL/il.h | 120 ++++ DevIL/src-IL/include/bc7decomp.h | 177 ++++++ DevIL/src-IL/include/il_dds.h | 2 + DevIL/src-IL/src/bc7decomp.cpp | 679 ++++++++++++++++++++++ DevIL/src-IL/src/il_dds.cpp | 98 +++- DevIL/src-ILUT/src/ilut_opengl.cpp | 37 +- 7 files changed, 1106 insertions(+), 10 deletions(-) create mode 100644 DevIL/src-IL/include/bc7decomp.h create mode 100644 DevIL/src-IL/src/bc7decomp.cpp diff --git a/DevIL/include/IL/devil_internal_exports.h b/DevIL/include/IL/devil_internal_exports.h index fd07d062..03d216cc 100644 --- a/DevIL/include/IL/devil_internal_exports.h +++ b/DevIL/include/IL/devil_internal_exports.h @@ -45,6 +45,8 @@ extern "C" { #define IL_MAX(a,b) (((a) > (b)) ? (a) : (b)) #define IL_MIN(a,b) (((a) < (b)) ? (a) : (b)) +#define IL_SWAP(T,a,b) {T t = a; a = b; b = t;} + //! Basic Palette struct typedef struct ILpal @@ -86,6 +88,7 @@ typedef struct ILimage ILuint OffY; //!< y-offset of the image ILubyte* DxtcData; //!< compressed data ILenum DxtcFormat; //!< compressed data format + ILenum DxgiFormat; //!< compressed data format ILuint DxtcSize; //!< compressed data size } ILimage; diff --git a/DevIL/include/IL/il.h b/DevIL/include/IL/il.h index 6aacb3bb..c3f7e5c8 100644 --- a/DevIL/include/IL/il.h +++ b/DevIL/include/IL/il.h @@ -396,6 +396,126 @@ typedef long long unsigned int ILuint64; #define IL_ATI1N 0x0710 #define IL_DXT1A 0x0711 // Normally the same as IL_DXT1, except for nVidia Texture Tools. +// DXGI definitions +#define IL_DXGI_UNKNOWN 0 +#define IL_DXGI_R32G32B32A32_TYPELESS 1 +#define IL_DXGI_R32G32B32A32_FLOAT 2 +#define IL_DXGI_R32G32B32A32_UINT 3 +#define IL_DXGI_R32G32B32A32_SINT 4 +#define IL_DXGI_R32G32B32_TYPELESS 5 +#define IL_DXGI_R32G32B32_FLOAT 6 +#define IL_DXGI_R32G32B32_UINT 7 +#define IL_DXGI_R32G32B32_SINT 8 +#define IL_DXGI_R16G16B16A16_TYPELESS 9 +#define IL_DXGI_R16G16B16A16_FLOAT 10 +#define IL_DXGI_R16G16B16A16_UNORM 11 +#define IL_DXGI_R16G16B16A16_UINT 12 +#define IL_DXGI_R16G16B16A16_SNORM 13 +#define IL_DXGI_R16G16B16A16_SINT 14 +#define IL_DXGI_R32G32_TYPELESS 15 +#define IL_DXGI_R32G32_FLOAT 16 +#define IL_DXGI_R32G32_UINT 17 +#define IL_DXGI_R32G32_SINT 18 +#define IL_DXGI_R32G8X24_TYPELESS 19 +#define IL_DXGI_D32_FLOAT_S8X24_UINT 20 +#define IL_DXGI_R32_FLOAT_X8X24_TYPELESS 21 +#define IL_DXGI_X32_TYPELESS_G8X24_UINT 22 +#define IL_DXGI_R10G10B10A2_TYPELESS 23 +#define IL_DXGI_R10G10B10A2_UNORM 24 +#define IL_DXGI_R10G10B10A2_UINT 25 +#define IL_DXGI_R11G11B10_FLOAT 26 +#define IL_DXGI_R8G8B8A8_TYPELESS 27 +#define IL_DXGI_R8G8B8A8_UNORM 28 +#define IL_DXGI_R8G8B8A8_UNORM_SRGB 29 +#define IL_DXGI_R8G8B8A8_UINT 30 +#define IL_DXGI_R8G8B8A8_SNORM 31 +#define IL_DXGI_R8G8B8A8_SINT 32 +#define IL_DXGI_R16G16_TYPELESS 33 +#define IL_DXGI_R16G16_FLOAT 34 +#define IL_DXGI_R16G16_UNORM 35 +#define IL_DXGI_R16G16_UINT 36 +#define IL_DXGI_R16G16_SNORM 37 +#define IL_DXGI_R16G16_SINT 38 +#define IL_DXGI_R32_TYPELESS 39 +#define IL_DXGI_D32_FLOAT 40 +#define IL_DXGI_R32_FLOAT 41 +#define IL_DXGI_R32_UINT 42 +#define IL_DXGI_R32_SINT 43 +#define IL_DXGI_R24G8_TYPELESS 44 +#define IL_DXGI_D24_UNORM_S8_UINT 45 +#define IL_DXGI_R24_UNORM_X8_TYPELESS 46 +#define IL_DXGI_X24_TYPELESS_G8_UINT 47 +#define IL_DXGI_R8G8_TYPELESS 48 +#define IL_DXGI_R8G8_UNORM 49 +#define IL_DXGI_R8G8_UINT 50 +#define IL_DXGI_R8G8_SNORM 51 +#define IL_DXGI_R8G8_SINT 52 +#define IL_DXGI_R16_TYPELESS 53 +#define IL_DXGI_D16_UNORM 55 +#define IL_DXGI_R16_UNORM 56 +#define IL_DXGI_R16_UINT 57 +#define IL_DXGI_R16_SNORM 58 +#define IL_DXGI_R16_SINT 59 +#define IL_DXGI_R8_TYPELESS 60 +#define IL_DXGI_R8_UNORM 61 +#define IL_DXGI_R8_UINT 62 +#define IL_DXGI_R8_SNORM 63 +#define IL_DXGI_R8_SINT 64 +#define IL_DXGI_A8_UNORM 65 +#define IL_DXGI_R1_UNORM 66 +#define IL_DXGI_R9G9B9E5_SHAREDEXP 67 +#define IL_DXGI_R8G8_B8G8_UNORM 68 +#define IL_DXGI_G8R8_G8B8_UNORM 69 +#define IL_DXGI_BC1_TYPELESS 70 +#define IL_DXGI_BC1_UNORM 71 +#define IL_DXGI_BC1_UNORM_SRGB 72 +#define IL_DXGI_BC2_TYPELESS 73 +#define IL_DXGI_BC2_UNORM 74 +#define IL_DXGI_BC2_UNORM_SRGB 75 +#define IL_DXGI_BC3_TYPELESS 76 +#define IL_DXGI_BC3_UNORM 77 +#define IL_DXGI_BC3_UNORM_SRGB 78 +#define IL_DXGI_BC4_TYPELESS 79 +#define IL_DXGI_BC4_UNORM 80 +#define IL_DXGI_BC4_SNORM 81 +#define IL_DXGI_BC5_TYPELESS 82 +#define IL_DXGI_BC5_UNORM 83 +#define IL_DXGI_BC5_SNORM 84 +#define IL_DXGI_B5G6R5_UNORM 85 +#define IL_DXGI_B5G5R5A1_UNORM 86 +#define IL_DXGI_B8G8R8A8_UNORM 87 +#define IL_DXGI_B8G8R8X8_UNORM 88 +#define IL_DXGI_R10G10B10_XR_BIAS_A2_UNORM 89 +#define IL_DXGI_B8G8R8A8_TYPELESS 90 +#define IL_DXGI_B8G8R8A8_UNORM_SRGB 91 +#define IL_DXGI_B8G8R8X8_TYPELESS 92 +#define IL_DXGI_B8G8R8X8_UNORM_SRGB 93 +#define IL_DXGI_BC6H_TYPELESS 94 +#define IL_DXGI_BC6H_UF16 95 +#define IL_DXGI_BC6H_SF16 96 +#define IL_DXGI_BC7_TYPELESS 97 +#define IL_DXGI_BC7_UNORM 98 +#define IL_DXGI_BC7_UNORM_SRGB 99 +#define IL_DXGI_AYUV 100 +#define IL_DXGI_Y410 101 +#define IL_DXGI_Y416 102 +#define IL_DXGI_NV12 103 +#define IL_DXGI_P010 104 +#define IL_DXGI_P016 105 +#define IL_DXGI_420_OPAQUE 106 +#define IL_DXGI_YUY2 107 +#define IL_DXGI_Y210 108 +#define IL_DXGI_Y216 109 +#define IL_DXGI_NV11 110 +#define IL_DXGI_AI44 111 +#define IL_DXGI_IA44 112 +#define IL_DXGI_P8 113 +#define IL_DXGI_A8P8 114 +#define IL_DXGI_B4G4R4A4_UNORM 115 +#define IL_DXGI_P208 130 +#define IL_DXGI_V208 131 +#define IL_DXGI_V408 132 + // Environment map definitions #define IL_CUBEMAP_POSITIVEX 0x00000400 #define IL_CUBEMAP_NEGATIVEX 0x00000800 diff --git a/DevIL/src-IL/include/bc7decomp.h b/DevIL/src-IL/include/bc7decomp.h new file mode 100644 index 00000000..efa5a6b6 --- /dev/null +++ b/DevIL/src-IL/include/bc7decomp.h @@ -0,0 +1,177 @@ +#pragma once + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) // nonstandard extension used: nameless struct/union +#endif + +#include "il_internal.h" + +#include + +namespace bc7decomp +{ + +enum eNoClamp { cNoClamp }; + +template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } + +class color_rgba +{ +public: + union + { + uint8_t m_comps[4]; + + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; + + inline color_rgba() + { + static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + } + + inline color_rgba(int y) + { + set(y); + } + + inline color_rgba(int y, int na) + { + set(y, na); + } + + inline color_rgba(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) + { + set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); + } + + inline color_rgba& set_noclamp_y(int y) + { + m_comps[0] = (uint8_t)y; + m_comps[1] = (uint8_t)y; + m_comps[2] = (uint8_t)y; + m_comps[3] = (uint8_t)255; + return *this; + } + + inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) + { + m_comps[0] = (uint8_t)sr; + m_comps[1] = (uint8_t)sg; + m_comps[2] = (uint8_t)sb; + m_comps[3] = (uint8_t)sa; + return *this; + } + + inline color_rgba &set(int y) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = 255; + return *this; + } + + inline color_rgba &set(int y, int na) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = static_cast(clamp(na, 0, 255)); + return *this; + } + + inline color_rgba &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + m_comps[3] = static_cast(clamp(sa, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(int sr, int sg, int sb) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(const color_rgba &other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } + inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } + + inline void clear() + { + m_comps[0] = 0; + m_comps[1] = 0; + m_comps[2] = 0; + m_comps[3] = 0; + } + + inline bool operator== (const color_rgba &rhs) const + { + if (m_comps[0] != rhs.m_comps[0]) return false; + if (m_comps[1] != rhs.m_comps[1]) return false; + if (m_comps[2] != rhs.m_comps[2]) return false; + if (m_comps[3] != rhs.m_comps[3]) return false; + return true; + } + + inline bool operator!= (const color_rgba &rhs) const + { + return !(*this == rhs); + } + + inline bool operator<(const color_rgba &rhs) const + { + for (int i = 0; i < 4; i++) + { + if (m_comps[i] < rhs.m_comps[i]) + return true; + else if (m_comps[i] != rhs.m_comps[i]) + return false; + } + return false; + } + + inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } + + static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(IL_MIN(a[0], b[0]), IL_MIN(a[1], b[1]), IL_MIN(a[2], b[2]), IL_MIN(a[3], b[3])); } + static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(IL_MAX(a[0], b[0]), IL_MAX(a[1], b[1]), IL_MAX(a[2], b[2]), IL_MAX(a[3], b[3])); } +}; + +bool unpack_bc7(const void *pBlock, color_rgba *pPixels); + +} // namespace bc7decomp + +namespace bc7decomp_ref +{ + bool unpack_bc7(const void* pBlock, bc7decomp::color_rgba* pPixels); +} // namespace bc7decomp_ref + +#ifdef _MSC_VER +#pragma warning(pop) +#endif \ No newline at end of file diff --git a/DevIL/src-IL/include/il_dds.h b/DevIL/src-IL/include/il_dds.h index d7db8e53..285ece24 100644 --- a/DevIL/src-IL/include/il_dds.h +++ b/DevIL/src-IL/include/il_dds.h @@ -332,6 +332,7 @@ ILboolean AllocImage(ILuint CompFormat, ILboolean IsDXT10); ILboolean DdsDecompress(ILuint CompFormat, ILboolean IsDXT10); ILboolean ReadMipmaps(ILuint CompFormat, ILboolean IsDXT10); ILuint DecodePixelFormat(ILuint *CompFormat); +ILuint GetDX10BlockSize(); void DxtcReadColor(ILushort Data, Color8888* Out); void DxtcReadColors(const ILubyte* Data, Color8888* Out); ILboolean DecompressARGB(ILuint CompFormat); @@ -342,6 +343,7 @@ ILboolean DecompressDXT2(ILimage *lImage, ILubyte *lCompData); ILboolean DecompressDXT3(ILimage *lImage, ILubyte *lCompData); ILboolean DecompressDXT4(ILimage *lImage, ILubyte *lCompData); ILboolean DecompressDXT5(ILimage *lImage, ILubyte *lCompData); +ILboolean DecompressBC7UNORM(ILimage* lImage, ILubyte* lCompData); ILboolean Decompress3Dc(); ILboolean DecompressAti1n(); ILboolean DecompressRXGB(); diff --git a/DevIL/src-IL/src/bc7decomp.cpp b/DevIL/src-IL/src/bc7decomp.cpp new file mode 100644 index 00000000..7825101e --- /dev/null +++ b/DevIL/src-IL/src/bc7decomp.cpp @@ -0,0 +1,679 @@ +// File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file) +#include "bc7decomp.h" +#include + +#if (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE2__)) +# define BC7DECOMP_USE_SSE2 +#endif + +#ifdef BC7DECOMP_USE_SSE2 +#include +#include +#endif + +namespace bc7decomp +{ + +#ifdef BC7DECOMP_USE_SSE2 + const __m128i g_bc7_weights4_sse2[8] = + { + _mm_set_epi16(4, 4, 4, 4, 0, 0, 0, 0), + _mm_set_epi16(13, 13, 13, 13, 9, 9, 9, 9), + _mm_set_epi16(21, 21, 21, 21, 17, 17, 17, 17), + _mm_set_epi16(30, 30, 30, 30, 26, 26, 26, 26), + _mm_set_epi16(38, 38, 38, 38, 34, 34, 34, 34), + _mm_set_epi16(47, 47, 47, 47, 43, 43, 43, 43), + _mm_set_epi16(55, 55, 55, 55, 51, 51, 51, 51), + _mm_set_epi16(64, 64, 64, 64, 60, 60, 60, 60), + }; +#endif + +const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; +const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; +const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + +const uint8_t g_bc7_partition2[64 * 16] = +{ + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1, + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0, + 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1, + 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0, + 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0, + 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1 +}; + +const uint8_t g_bc7_partition3[64 * 16] = +{ + 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0, + 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0, + 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1, + 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1, + 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1, + 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2, + 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0, +}; + +const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 }; + +const uint8_t g_bc7_table_anchor_index_third_subset_1[64] = +{ + 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3 +}; + +const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = +{ + 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 +}; + +const uint8_t g_bc7_first_byte_to_mode[256] = +{ + 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +}; + +inline void insert_weight_zero(uint64_t& index_bits, uint32_t bits_per_index, uint32_t offset) +{ + uint64_t LOW_BIT_MASK = (static_cast(1) << ((bits_per_index * (offset + 1)) - 1)) - 1; + uint64_t HIGH_BIT_MASK = ~LOW_BIT_MASK; + + index_bits = ((index_bits & HIGH_BIT_MASK) << 1) | (index_bits & LOW_BIT_MASK); +} + +// BC7 mode 0-7 decompression. +// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. + +static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } +static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } + +static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - g_bc7_weights2[w]) + h * g_bc7_weights2[w] + 32) >> 6; } +static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - g_bc7_weights3[w]) + h * g_bc7_weights3[w] + 32) >> 6; } +static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - g_bc7_weights4[w]) + h * g_bc7_weights4[w] + 32) >> 6; } +static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) +{ + assert(l <= 255 && h <= 255); + switch (bits) + { + case 2: return bc7_interp2(l, h, w); + case 3: return bc7_interp3(l, h, w); + case 4: return bc7_interp4(l, h, w); + default: + break; + } + return 0; +} + + +#ifdef BC7DECOMP_USE_SSE2 +static inline __m128i bc7_interp_sse2(__m128i l, __m128i h, __m128i w, __m128i iw) +{ + return _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(l, iw), _mm_mullo_epi16(h, w)), _mm_set1_epi16(32)), 6); +} + +static inline void bc7_interp2_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors) +{ + __m128i endpoints = _mm_loadu_si64(endpoint_pair); + __m128i endpoints_16 = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128()); + + __m128i endpoints_16_swapped = _mm_shuffle_epi32(endpoints_16, _MM_SHUFFLE(1, 0, 3, 2)); + + // Interpolated colors will be color 1 and 2 + __m128i interpolated_colors = bc7_interp_sse2(endpoints_16, endpoints_16_swapped, _mm_set1_epi16(21), _mm_set1_epi16(43)); + + // all_colors will be 1, 2, 0, 3 + __m128i all_colors = _mm_packus_epi16(interpolated_colors, endpoints_16); + + all_colors = _mm_shuffle_epi32(all_colors, _MM_SHUFFLE(3, 1, 0, 2)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors); +} + +static inline void bc7_interp3_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors) +{ + __m128i endpoints = _mm_loadu_si64(endpoint_pair); + __m128i endpoints_16bit = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128()); + __m128i endpoints_16bit_swapped = _mm_shuffle_epi32(endpoints_16bit, _MM_SHUFFLE(1, 0, 3, 2)); + + __m128i interpolated_16 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set1_epi16(9), _mm_set1_epi16(55)); + __m128i interpolated_23 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(37, 37, 37, 37, 18, 18, 18, 18), _mm_set_epi16(27, 27, 27, 27, 46, 46, 46, 46)); + __m128i interpolated_45 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(18, 18, 18, 18, 37, 37, 37, 37), _mm_set_epi16(46, 46, 46, 46, 27, 27, 27, 27)); + + __m128i interpolated_01 = _mm_unpacklo_epi64(endpoints_16bit, interpolated_16); + __m128i interpolated_67 = _mm_unpackhi_epi64(interpolated_16, endpoints_16bit); + + __m128i all_colors_0 = _mm_packus_epi16(interpolated_01, interpolated_23); + __m128i all_colors_1 = _mm_packus_epi16(interpolated_45, interpolated_67); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors + 4), all_colors_1); +} +#endif + +bool unpack_bc7_mode0_2(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) +{ + //const uint32_t SUBSETS = 3; + const uint32_t ENDPOINTS = 6; + const uint32_t COMPS = 3; + const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; + const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; + const uint32_t PBITS = (mode == 0) ? 6 : 0; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + const uint32_t PART_BITS = (mode == 0) ? 4 : 6; + const uint32_t PART_MASK = (1 << PART_BITS) - 1; + + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; + + const uint32_t part = (low_chunk >> (mode + 1)) & PART_MASK; + + uint64_t channel_read_chunks[3] = { 0, 0, 0 }; + + if (mode == 0) + { + channel_read_chunks[0] = low_chunk >> 5; + channel_read_chunks[1] = low_chunk >> 29; + channel_read_chunks[2] = ((low_chunk >> 53) | (high_chunk << 11)); + } + else + { + channel_read_chunks[0] = low_chunk >> 9; + channel_read_chunks[1] = ((low_chunk >> 39) | (high_chunk << 25)); + channel_read_chunks[2] = high_chunk >> 5; + } + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + { + uint64_t channel_read_chunk = channel_read_chunks[c]; + for (uint32_t e = 0; e < ENDPOINTS; e++) + { + endpoints[e][c] = static_cast(channel_read_chunk & ENDPOINT_MASK); + channel_read_chunk >>= ENDPOINT_BITS; + } + } + + uint32_t pbits[6]; + if (mode == 0) + { + uint8_t p_bits_chunk = static_cast((high_chunk >> 13) & 0xff); + + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = (p_bits_chunk >> p) & 1; + } + + uint64_t weights_read_chunk = high_chunk >> (67 - 16 * WEIGHT_BITS); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, 0); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, IL_MIN(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part])); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, IL_MAX(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part])); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + { + weights[i] = static_cast(weights_read_chunk & WEIGHT_MASK); + weights_read_chunk >>= WEIGHT_BITS; + } + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = static_cast((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + + color_rgba block_colors[3][8]; + +#ifdef BC7DECOMP_USE_SSE2 + for (uint32_t s = 0; s < 3; s++) + { + if (WEIGHT_BITS == 2) + bc7_interp2_sse2(endpoints + s * 2, block_colors[s]); + else + bc7_interp3_sse2(endpoints + s * 2, block_colors[s]); + } +#else + for (uint32_t s = 0; s < 3; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < 3; c++) + block_colors[s][i][c] = static_cast(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS)); + block_colors[s][i][3] = 255; + } +#endif + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]]; + + return true; +} + +bool unpack_bc7_mode1_3_7(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) +{ + //const uint32_t SUBSETS = 2; + const uint32_t ENDPOINTS = 4; + const uint32_t COMPS = (mode == 7) ? 4 : 3; + const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; + const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; + const uint32_t PBITS = (mode == 1) ? 2 : 4; + const uint32_t SHARED_PBITS = (mode == 1) ? true : false; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; + + const uint32_t part = ((low_chunk >> (mode + 1)) & 0x3f); + + color_rgba endpoints[ENDPOINTS]; + + uint64_t channel_read_chunks[4] = { 0, 0, 0, 0 }; + uint64_t p_read_chunk = 0; + channel_read_chunks[0] = (low_chunk >> (mode + 7)); + uint64_t weight_read_chunk; + + switch (mode) + { + case 1: + channel_read_chunks[1] = (low_chunk >> 32); + channel_read_chunks[2] = ((low_chunk >> 56) | (high_chunk << 8)); + p_read_chunk = high_chunk >> 16; + weight_read_chunk = high_chunk >> 18; + break; + case 3: + channel_read_chunks[1] = ((low_chunk >> 38) | (high_chunk << 26)); + channel_read_chunks[2] = high_chunk >> 2; + p_read_chunk = high_chunk >> 30; + weight_read_chunk = high_chunk >> 34; + break; + case 7: + channel_read_chunks[1] = low_chunk >> 34; + channel_read_chunks[2] = ((low_chunk >> 54) | (high_chunk << 10)); + channel_read_chunks[3] = high_chunk >> 10; + p_read_chunk = (high_chunk >> 30); + weight_read_chunk = (high_chunk >> 34); + break; + default: + return false; + }; + + for (uint32_t c = 0; c < COMPS; c++) + { + uint64_t channel_read_chunk = channel_read_chunks[c]; + for (uint32_t e = 0; e < ENDPOINTS; e++) + { + endpoints[e][c] = static_cast(channel_read_chunk & ENDPOINT_MASK); + channel_read_chunk >>= ENDPOINT_BITS; + } + } + + uint32_t pbits[4]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = (p_read_chunk >> p) & 1; + + insert_weight_zero(weight_read_chunk, WEIGHT_BITS, 0); + insert_weight_zero(weight_read_chunk, WEIGHT_BITS, g_bc7_table_anchor_index_second_subset[part]); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + { + weights[i] = static_cast(weight_read_chunk & WEIGHT_MASK); + weight_read_chunk >>= WEIGHT_BITS; + } + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = static_cast((mode != 7U && c == 3U) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + + color_rgba block_colors[2][8]; +#ifdef BC7DECOMP_USE_SSE2 + for (uint32_t s = 0; s < 2; s++) + { + if (WEIGHT_BITS == 2) + bc7_interp2_sse2(endpoints + s * 2, block_colors[s]); + else + bc7_interp3_sse2(endpoints + s * 2, block_colors[s]); + } +#else + for (uint32_t s = 0; s < 2; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < COMPS; c++) + block_colors[s][i][c] = static_cast(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS)); + block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; + } +#endif + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]]; + + return true; +} + +bool unpack_bc7_mode4_5(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) +{ + const uint32_t ENDPOINTS = 2; + //const uint32_t COMPS = 4; + const uint32_t WEIGHT_BITS = 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; + const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t A_WEIGHT_MASK = (1 << A_WEIGHT_BITS) - 1; + const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; + const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + const uint32_t A_ENDPOINT_MASK = (1 << A_ENDPOINT_BITS) - 1; + //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; + + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; + + const uint32_t comp_rot = (low_chunk >> (mode + 1)) & 0x3; + const uint32_t index_mode = (mode == 4) ? static_cast((low_chunk >> 7) & 1) : 0; + + uint64_t color_read_bits = low_chunk >> 8; + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < 3; c++) + { + for (uint32_t e = 0; e < ENDPOINTS; e++) + { + endpoints[e][c] = static_cast(color_read_bits & ENDPOINT_MASK); + color_read_bits >>= ENDPOINT_BITS; + } + } + + endpoints[0][3] = static_cast(color_read_bits & ENDPOINT_MASK); + + uint64_t rgb_weights_chunk; + uint64_t a_weights_chunk; + if (mode == 4) + { + endpoints[0][3] = static_cast(color_read_bits & A_ENDPOINT_MASK); + endpoints[1][3] = static_cast((color_read_bits >> A_ENDPOINT_BITS) & A_ENDPOINT_MASK); + rgb_weights_chunk = ((low_chunk >> 50) | (high_chunk << 14)); + a_weights_chunk = high_chunk >> 17; + } + else if (mode == 5) + { + endpoints[0][3] = static_cast(color_read_bits & A_ENDPOINT_MASK); + endpoints[1][3] = static_cast(((low_chunk >> 58) | (high_chunk << 6)) & A_ENDPOINT_MASK); + rgb_weights_chunk = high_chunk >> 2; + a_weights_chunk = high_chunk >> 33; + } + else + return false; + + insert_weight_zero(rgb_weights_chunk, WEIGHT_BITS, 0); + insert_weight_zero(a_weights_chunk, A_WEIGHT_BITS, 0); + + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; + const uint32_t weight_mask[2] = { index_mode ? A_WEIGHT_MASK : WEIGHT_MASK, index_mode ? WEIGHT_MASK : A_WEIGHT_MASK }; + + uint32_t weights[16], a_weights[16]; + + if (index_mode) + IL_SWAP(uint64_t, rgb_weights_chunk, a_weights_chunk); + + for (uint32_t i = 0; i < 16; i++) + { + weights[i] = (rgb_weights_chunk & weight_mask[0]); + rgb_weights_chunk >>= weight_bits[0]; + } + + for (uint32_t i = 0; i < 16; i++) + { + a_weights[i] = (a_weights_chunk & weight_mask[1]); + a_weights_chunk >>= weight_bits[1]; + } + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = static_cast(bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS)); + + color_rgba block_colors[8]; +#ifdef BC7DECOMP_USE_SSE2 + if (weight_bits[0] == 3) + bc7_interp3_sse2(endpoints, block_colors); + else + bc7_interp2_sse2(endpoints, block_colors); +#else + for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) + for (uint32_t c = 0; c < 3; c++) + block_colors[i][c] = static_cast(bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0])); +#endif + + for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) + block_colors[i][3] = static_cast(bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1])); + + for (uint32_t i = 0; i < 16; i++) + { + pPixels[i] = block_colors[weights[i]]; + pPixels[i].a = block_colors[a_weights[i]].a; + if (comp_rot >= 1) + IL_SWAP(uint8_t, pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); + } + + return true; +} + +struct bc7_mode_6 +{ + struct + { + uint64_t m_mode : 7; + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 7; + uint64_t m_a1 : 7; + uint64_t m_p0 : 1; + } m_lo; + + union + { + struct + { + uint64_t m_p1 : 1; + uint64_t m_s00 : 3; + uint64_t m_s10 : 4; + uint64_t m_s20 : 4; + uint64_t m_s30 : 4; + + uint64_t m_s01 : 4; + uint64_t m_s11 : 4; + uint64_t m_s21 : 4; + uint64_t m_s31 : 4; + + uint64_t m_s02 : 4; + uint64_t m_s12 : 4; + uint64_t m_s22 : 4; + uint64_t m_s32 : 4; + + uint64_t m_s03 : 4; + uint64_t m_s13 : 4; + uint64_t m_s23 : 4; + uint64_t m_s33 : 4; + + } m_hi; + + uint64_t m_hi_bits; + }; +}; + +bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) +{ + static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); + + const bc7_mode_6 &block = *static_cast(pBlock_bits); + + if (block.m_lo.m_mode != (1 << 6)) + return false; + + const uint32_t r0 = static_cast((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); + const uint32_t g0 = static_cast((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); + const uint32_t b0 = static_cast((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); + const uint32_t a0 = static_cast((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); + const uint32_t r1 = static_cast((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); + const uint32_t g1 = static_cast((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); + const uint32_t b1 = static_cast((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); + const uint32_t a1 = static_cast((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); + + color_rgba vals[16]; +#ifdef BC7DECOMP_USE_SSE2 + __m128i vep0 = _mm_set_epi16((short)a0, (short)b0, (short)g0, (short)r0, (short)a0, (short)b0, (short)g0, (short)r0); + __m128i vep1 = _mm_set_epi16((short)a1, (short)b1, (short)g1, (short)r1, (short)a1, (short)b1, (short)g1, (short)r1); + + for (uint32_t i = 0; i < 16; i += 4) + { + const __m128i w0 = g_bc7_weights4_sse2[i / 4 * 2 + 0]; + const __m128i w1 = g_bc7_weights4_sse2[i / 4 * 2 + 1]; + + const __m128i iw0 = _mm_sub_epi16(_mm_set1_epi16(64), w0); + const __m128i iw1 = _mm_sub_epi16(_mm_set1_epi16(64), w1); + + __m128i first_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw0), _mm_mullo_epi16(vep1, w0)), _mm_set1_epi16(32)), 6); + __m128i second_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw1), _mm_mullo_epi16(vep1, w1)), _mm_set1_epi16(32)), 6); + __m128i combined = _mm_packus_epi16(first_half, second_half); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(vals + i), combined); + } +#else + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t w = g_bc7_weights4[i]; + const uint32_t iw = 64 - w; + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, + (a0 * iw + a1 * w + 32) >> 6); + } +#endif + + pPixels[0] = vals[block.m_hi.m_s00]; + pPixels[1] = vals[block.m_hi.m_s10]; + pPixels[2] = vals[block.m_hi.m_s20]; + pPixels[3] = vals[block.m_hi.m_s30]; + + pPixels[4] = vals[block.m_hi.m_s01]; + pPixels[5] = vals[block.m_hi.m_s11]; + pPixels[6] = vals[block.m_hi.m_s21]; + pPixels[7] = vals[block.m_hi.m_s31]; + + pPixels[8] = vals[block.m_hi.m_s02]; + pPixels[9] = vals[block.m_hi.m_s12]; + pPixels[10] = vals[block.m_hi.m_s22]; + pPixels[11] = vals[block.m_hi.m_s32]; + + pPixels[12] = vals[block.m_hi.m_s03]; + pPixels[13] = vals[block.m_hi.m_s13]; + pPixels[14] = vals[block.m_hi.m_s23]; + pPixels[15] = vals[block.m_hi.m_s33]; + + return true; +} + +bool unpack_bc7(const void *pBlock, color_rgba *pPixels) +{ + const uint8_t *block_bytes = static_cast(pBlock); + uint8_t mode = g_bc7_first_byte_to_mode[block_bytes[0]]; + + uint64_t data_chunks[2]; + + uint64_t endian_check = 1; + if (*reinterpret_cast(&endian_check) == 1) + memcpy(data_chunks, pBlock, 16); + else + { + data_chunks[0] = data_chunks[1] = 0; + for (int chunk_index = 0; chunk_index < 2; chunk_index++) + { + for (int byte_index = 0; byte_index < 8; byte_index++) + data_chunks[chunk_index] |= static_cast(block_bytes[chunk_index * 8 + byte_index]) << (byte_index * 8); + } + } + + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, data_chunks, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, data_chunks, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, data_chunks, pPixels); + case 6: + return unpack_bc7_mode6(data_chunks, pPixels); + default: + memset(pPixels, 0, sizeof(color_rgba) * 16); + break; + } + + return false; +} + +} // namespace bc7decomp + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright(c) 2020 Richard Geldreich, Jr. +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files(the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions : +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain(www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non - commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain.We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors.We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + diff --git a/DevIL/src-IL/src/il_dds.cpp b/DevIL/src-IL/src/il_dds.cpp index a1df617b..6606889f 100644 --- a/DevIL/src-IL/src/il_dds.cpp +++ b/DevIL/src-IL/src/il_dds.cpp @@ -24,6 +24,8 @@ // too strictly while reading. +#include "bc7decomp.h" + #include "il_internal.h" #ifndef IL_NO_DDS #include "il_dds.h" @@ -446,11 +448,13 @@ ILboolean iLoadDdsInternal() ilSetError(IL_INVALID_FILE_HEADER); return IL_FALSE; } + BlockSize = GetDX10BlockSize(); CompFormat = HeadDXT10.dxgiFormat; } else { + } // Needed for DXT10? @@ -626,6 +630,18 @@ ILuint DecodePixelFormat(ILuint *CompFormat) return BlockSize; } +ILuint GetDX10BlockSize() +{ + ILuint BlockSize = ((Head.Width + 3) / 4) * ((Head.Height + 3) / 4) * Head.Depth; + switch (HeadDXT10.dxgiFormat) + { + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + BlockSize *= 16; + } + return BlockSize; +} + // From https://msdn.microsoft.com/en-us/windows/uwp/gaming/complete-code-for-ddstextureloader // @@ -889,6 +905,7 @@ ILboolean AllocImage(ILuint CompFormat, ILboolean IsDXT10) if (!IsDXT10) { + iCurImage->DxgiFormat = IL_DXGI_UNKNOWN; switch (CompFormat) { case PF_RGB: @@ -982,6 +999,22 @@ ILboolean AllocImage(ILuint CompFormat, ILboolean IsDXT10) return IL_FALSE; break; + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + if (!ilTexImage(Width, Height, Depth, channels, format, IL_UNSIGNED_BYTE, NULL)) + return IL_FALSE; + if (ilGetInteger(IL_KEEP_DXTC_DATA) == IL_TRUE && CompData) { + iCurImage->DxtcData = (ILubyte*)ialloc(Head.LinearSize); + if (iCurImage->DxtcData == NULL) + return IL_FALSE; + iCurImage->DxtcFormat = IL_DXT_NO_COMP; + iCurImage->DxgiFormat = CompFormat; + iCurImage->DxtcSize = Head.LinearSize; + memcpy(iCurImage->DxtcData, CompData, iCurImage->DxtcSize); + } + break; + default: ilSetError(IL_INVALID_FILE_HEADER); return IL_FALSE; @@ -1011,8 +1044,16 @@ ILboolean AllocImage(ILuint CompFormat, ILboolean IsDXT10) ILboolean DdsDecompress(ILuint CompFormat, ILboolean IsDXT10) { if (IsDXT10) - { //@TODO: Put in compressed formats - return DecompressARGBDX10(CompFormat); + { //@TODO: Put in all compressed formats + switch (CompFormat) + { + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + return DecompressBC7UNORM(Image, CompData); + default: + return DecompressARGBDX10(CompFormat); + } } switch (CompFormat) @@ -1077,9 +1118,6 @@ ILboolean ReadMipmaps(ILuint CompFormat, ILboolean IsDXT10) ILboolean isCompressed = IL_FALSE; - if (IsDXT10) //@TODO: Add in mipmap support - return IL_TRUE; - Bpp = iCompFormatToBpp(CompFormat); Channels = iCompFormatToChannelCount(CompFormat); Bpc = iCompFormatToBpc(CompFormat); @@ -1092,8 +1130,21 @@ ILboolean ReadMipmaps(ILuint CompFormat, ILboolean IsDXT10) //if (Head.Flags1 & DDS_LINEARSIZE) { // CompFactor = (Width * Height * Depth * Bpp) / Head.LinearSize; //} - switch (CompFormat) + if (IsDXT10) { + switch (CompFormat) + { + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + CompFactor = 4; + isCompressed = IL_TRUE; + break; + } + } + else + { + switch (CompFormat) + { case PF_DXT1: // This is officially 6, we have 8 here because DXT1 may contain alpha. CompFactor = 8; @@ -1116,6 +1167,7 @@ ILboolean ReadMipmaps(ILuint CompFormat, ILboolean IsDXT10) break; default: CompFactor = 1; + } } StartImage = Image; @@ -1190,7 +1242,13 @@ ILboolean ReadMipmaps(ILuint CompFormat, ILboolean IsDXT10) Image->DxtcData = (ILubyte*)ialloc(Head.LinearSize); if (Image->DxtcData == NULL) return IL_FALSE; - Image->DxtcFormat = CompFormat - PF_DXT1 + IL_DXT1; + if (IsDXT10) + { + Image->DxgiFormat = CompFormat; + Image->DxtcFormat = IL_DXT_NO_COMP; + } + else + Image->DxtcFormat = CompFormat - PF_DXT1 + IL_DXT1; Image->DxtcSize = Head.LinearSize; memcpy(Image->DxtcData, CompData, Image->DxtcSize); } @@ -1473,6 +1531,32 @@ ILboolean DecompressDXT5(ILimage *lImage, ILubyte *lCompData) return IL_TRUE; } +ILboolean DecompressBC7UNORM(ILimage* lImage, ILubyte* lCompData) +{ + const size_t blocks_x = (lImage->Width + 3) / 4; + const size_t blocks_y = (lImage->Height + 3) / 4; + + for (size_t by = 0; by < blocks_y; by++) + { + for (size_t bx = 0; bx < blocks_x; bx++) + { + void* pBlock = &lCompData[(bx + by * blocks_x) * 16]; + + bc7decomp::color_rgba unpacked_pixels[16]; + for (size_t i = 0; i < 16; i++) + unpacked_pixels[i].set(0, 0, 0, 255); + + bc7decomp::unpack_bc7((const uint8_t*)pBlock, (bc7decomp::color_rgba*)unpacked_pixels); + + size_t copyX = IL_MIN(4, lImage->Width - bx * 4); + size_t copyY = IL_MIN(4, lImage->Height - by * 4); + for (size_t y = 0; y < copyY; y++) + memcpy(&lImage->Data[bx * 16 + (by * 4 + y) * lImage->Width * 4], &unpacked_pixels[y * 4], copyX * 4); + } // bx + } // by + + return IL_TRUE; +} ILboolean Decompress3Dc() { diff --git a/DevIL/src-ILUT/src/ilut_opengl.cpp b/DevIL/src-ILUT/src/ilut_opengl.cpp index b7b973a9..9b4ee4c7 100644 --- a/DevIL/src-ILUT/src/ilut_opengl.cpp +++ b/DevIL/src-ILUT/src/ilut_opengl.cpp @@ -84,6 +84,7 @@ void *aglGetProcAddress( const GLubyte *name ) { static ILboolean HasCubemapHardware = IL_FALSE; static ILboolean HasNonPowerOfTwoHardware = IL_FALSE; +static ILboolean HasBTPCHardware = IL_FALSE; #if defined(_WIN32) || defined(_WIN64) || defined(linux) || defined(__APPLE__) ILGLTEXIMAGE3DARBPROC ilGLTexImage3D = NULL; ILGLTEXSUBIMAGE3DARBPROC ilGLTexSubImage3D = NULL; @@ -170,6 +171,8 @@ ILboolean ilutGLInit() HasCubemapHardware = IL_TRUE; if (IsExtensionSupported("GL_ARB_texture_non_power_of_two")) HasNonPowerOfTwoHardware = IL_TRUE; + if (IsExtensionSupported("GL_ARB_texture_compression_bptc")) + HasBTPCHardware = IL_TRUE; return IL_TRUE; } @@ -241,6 +244,20 @@ ILuint GLGetDXTCNum(ILenum DXTCFormat) return DXTCFormat; } +ILuint GLGetDXGINum(ILenum DXGIFormat) +{ + switch (DXGIFormat) + { + case IL_DXGI_BC7_UNORM: + DXGIFormat = 0x8E8C; + break; + case IL_DXGI_BC7_UNORM_SRGB: + DXGIFormat = 0x8E8D; + break; + } + return DXGIFormat; +} + // We assume *all* states have been set by the user, including 2D texturing! ILboolean ILAPIENTRY ilutGLTexImage_(GLuint Level, GLuint Target, ILimage *Image) @@ -263,9 +280,23 @@ ILboolean ILAPIENTRY ilutGLTexImage_(GLuint Level, GLuint Target, ILimage *Image if (ilutGetBoolean(ILUT_GL_USE_S3TC) && ilGLCompressed2D != NULL) { if (Image->DxtcData != NULL && Image->DxtcSize != 0) { DXTCFormat = GLGetDXTCNum(Image->DxtcFormat); - ilGLCompressed2D(Target, Level, DXTCFormat, Image->Width, - Image->Height, 0, Image->DxtcSize, Image->DxtcData); - return IL_TRUE; + if (DXTCFormat == IL_DXT_NO_COMP && Image->DxgiFormat != IL_DXGI_UNKNOWN) + { + if (HasBTPCHardware) + { + DXTCFormat = GLGetDXGINum(Image->DxgiFormat); + ilGLCompressed2D(Target, Level, DXTCFormat, Image->Width, + Image->Height, 0, Image->DxtcSize, Image->DxtcData); + return IL_TRUE; + } + // Fallthrough if not supported + } + else + { + ilGLCompressed2D(Target, Level, DXTCFormat, Image->Width, + Image->Height, 0, Image->DxtcSize, Image->DxtcData); + return IL_TRUE; + } } if (ilutGetBoolean(ILUT_GL_GEN_S3TC)) { From 7ae4efa8bcdd5dbe56f03e05f8c315a6e8cec4e7 Mon Sep 17 00:00:00 2001 From: Trist Date: Mon, 17 Apr 2023 10:03:55 +1000 Subject: [PATCH 2/5] Add support for retrieving kept dxtc data. --- DevIL/include/IL/il.h | 1 + DevIL/src-IL/src/il_dds-save.cpp | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/DevIL/include/IL/il.h b/DevIL/include/IL/il.h index c3f7e5c8..78a29b46 100644 --- a/DevIL/include/IL/il.h +++ b/DevIL/include/IL/il.h @@ -686,6 +686,7 @@ ILAPI ILboolean ILAPIENTRY ilGetBoolean(ILenum Mode); ILAPI void ILAPIENTRY ilGetBooleanv(ILenum Mode, ILboolean *Param); ILAPI ILubyte* ILAPIENTRY ilGetData(void); ILAPI ILuint ILAPIENTRY ilGetDXTCData(void *Buffer, ILuint BufferSize, ILenum DXTCFormat); +ILAPI ILubyte* ILAPIENTRY ilGetKeptDXTCData(ILuint* DXTCSize, ILenum* DXTCFormat, ILenum* DXGIFormat); ILAPI ILenum ILAPIENTRY ilGetError(void); ILAPI ILint ILAPIENTRY ilGetInteger(ILenum Mode); ILAPI void ILAPIENTRY ilGetIntegerv(ILenum Mode, ILint *Param); diff --git a/DevIL/src-IL/src/il_dds-save.cpp b/DevIL/src-IL/src/il_dds-save.cpp index f17862f3..b0d47170 100644 --- a/DevIL/src-IL/src/il_dds-save.cpp +++ b/DevIL/src-IL/src/il_dds-save.cpp @@ -362,6 +362,13 @@ ILuint ILAPIENTRY ilGetDXTCData(void *Buffer, ILuint BufferSize, ILenum DXTCForm return retVal; } +ILAPI ILubyte* ILAPIENTRY ilGetKeptDXTCData(ILuint* DXTCSize, ILenum* DXTCFormat, ILenum* DXGIFormat) +{ + *DXTCSize = iCurImage->DxtcSize; + *DXTCFormat = iCurImage->DxtcFormat; + *DXGIFormat = iCurImage->DxgiFormat; + return iCurImage->DxtcData; +} // Added the next two functions based on Charles Bloom's rant at // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html. From 584a8011c047b66dc65fc0bc0fc0817f2297602a Mon Sep 17 00:00:00 2001 From: Tristan Lewis Date: Mon, 17 Apr 2023 10:48:47 +1000 Subject: [PATCH 3/5] Fix compilation with jasper 3.0.x --- DevIL/src-IL/src/il_jp2.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/DevIL/src-IL/src/il_jp2.cpp b/DevIL/src-IL/src/il_jp2.cpp index 89075a52..19dfc473 100644 --- a/DevIL/src-IL/src/il_jp2.cpp +++ b/DevIL/src-IL/src/il_jp2.cpp @@ -324,7 +324,11 @@ ILboolean iLoadJp2Internal(jas_stream_t *Stream, ILimage *Image) // see: https://github.com/OSGeo/gdal/commit/9ef8e16e27c5fc4c491debe50bf2b7f3e94ed334 // https://github.com/DentonW/DevIL/issues/90 #if defined(PRIjas_seqent) -static int iJp2_file_read(jas_stream_obj_t *obj, char *buf, unsigned cnt) +#if JAS_VERSION_MAJOR >= 3 +static ssize_t iJp2_file_read(jas_stream_obj_t *obj, char *buf, size_t cnt) +#elif JAS_VERSION_MAJOR < 3 +static int iJp2_file_read(jas_stream_obj_t* obj, char* buf, unsigned cnt) +#endif #else static int iJp2_file_read(jas_stream_obj_t *obj, char *buf, int cnt) #endif @@ -334,7 +338,11 @@ static int iJp2_file_read(jas_stream_obj_t *obj, char *buf, int cnt) } #if defined(JAS_INCLUDE_JP2_CODEC) -static int iJp2_file_write(jas_stream_obj_t *obj, const char *buf, unsigned cnt) +#if JAS_VERSION_MAJOR >= 3 +static ssize_t iJp2_file_write(jas_stream_obj_t* obj, const char* buf, size_t cnt) +#elif JAS_VERSION_MAJOR < 3 +static ssize_t iJp2_file_write(jas_stream_obj_t *obj, const char *buf, size_t cnt) +#endif #elif defined(PRIjas_seqent) static int iJp2_file_write(jas_stream_obj_t *obj, char *buf, unsigned cnt) #else From 180eb98cc8daef840375e5a8597f4953a775647b Mon Sep 17 00:00:00 2001 From: Tristan Lewis Date: Tue, 25 Jul 2023 14:36:04 +1000 Subject: [PATCH 4/5] Change dxtc data support to retrieve all mipmaps. --- DevIL/include/IL/il.h | 2 +- DevIL/src-IL/src/il_dds-save.cpp | 23 ++++++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/DevIL/include/IL/il.h b/DevIL/include/IL/il.h index 78a29b46..72f39c9b 100644 --- a/DevIL/include/IL/il.h +++ b/DevIL/include/IL/il.h @@ -686,7 +686,7 @@ ILAPI ILboolean ILAPIENTRY ilGetBoolean(ILenum Mode); ILAPI void ILAPIENTRY ilGetBooleanv(ILenum Mode, ILboolean *Param); ILAPI ILubyte* ILAPIENTRY ilGetData(void); ILAPI ILuint ILAPIENTRY ilGetDXTCData(void *Buffer, ILuint BufferSize, ILenum DXTCFormat); -ILAPI ILubyte* ILAPIENTRY ilGetKeptDXTCData(ILuint* DXTCSize, ILenum* DXTCFormat, ILenum* DXGIFormat); +ILAPI void ILAPIENTRY ilGetKeptDXTCData(ILubyte** DXTCData, ILuint* DXTCMipMapCount, ILuint* DXTCSize, ILenum* DXTCFormat, ILenum* DXGIFormat); ILAPI ILenum ILAPIENTRY ilGetError(void); ILAPI ILint ILAPIENTRY ilGetInteger(ILenum Mode); ILAPI void ILAPIENTRY ilGetIntegerv(ILenum Mode, ILint *Param); diff --git a/DevIL/src-IL/src/il_dds-save.cpp b/DevIL/src-IL/src/il_dds-save.cpp index b0d47170..fba42bc7 100644 --- a/DevIL/src-IL/src/il_dds-save.cpp +++ b/DevIL/src-IL/src/il_dds-save.cpp @@ -362,12 +362,29 @@ ILuint ILAPIENTRY ilGetDXTCData(void *Buffer, ILuint BufferSize, ILenum DXTCForm return retVal; } -ILAPI ILubyte* ILAPIENTRY ilGetKeptDXTCData(ILuint* DXTCSize, ILenum* DXTCFormat, ILenum* DXGIFormat) +ILAPI void ILAPIENTRY ilGetKeptDXTCData(ILubyte** DXTCData, ILuint* DXTCMipMapCount, ILuint* DXTCSize, ILenum* DXTCFormat, ILenum* DXGIFormat) { - *DXTCSize = iCurImage->DxtcSize; + ILuint mipMapCount = 1; + ILimage* mipmap = iCurImage; + if (DXTCData == nullptr) + { + while (mipmap->Mipmaps) + { + ++mipMapCount; + mipmap = mipmap->Mipmaps; + } + *DXTCMipMapCount = mipMapCount; + return; + } + *DXTCFormat = iCurImage->DxtcFormat; *DXGIFormat = iCurImage->DxgiFormat; - return iCurImage->DxtcData; + for (int i = 0; i < *DXTCMipMapCount && mipmap; ++i) + { + DXTCData[i] = mipmap->DxtcData; + DXTCSize[i] = mipmap->DxtcSize; + mipmap = mipmap->Mipmaps; + } } // Added the next two functions based on Charles Bloom's rant at From 3215ad7f9ff494d67121341f119c346545b9ac72 Mon Sep 17 00:00:00 2001 From: Tristan Lewis Date: Tue, 1 Aug 2023 16:24:08 +1000 Subject: [PATCH 5/5] Add option to skip dds decompression. --- DevIL/include/IL/il.h | 1 + DevIL/src-IL/include/il_states.h | 1 + DevIL/src-IL/src/il_dds.cpp | 3 +++ DevIL/src-IL/src/il_states.cpp | 10 ++++++++++ 4 files changed, 15 insertions(+) diff --git a/DevIL/include/IL/il.h b/DevIL/include/IL/il.h index 72f39c9b..ad50e3ad 100644 --- a/DevIL/include/IL/il.h +++ b/DevIL/include/IL/il.h @@ -390,6 +390,7 @@ typedef long long unsigned int ILuint64; #define IL_DXT5 0x070A #define IL_DXT_NO_COMP 0x070B #define IL_KEEP_DXTC_DATA 0x070C +#define IL_SKIP_DXTC_DECOMPRESS 0x070D #define IL_DXTC_DATA_FORMAT 0x070D #define IL_3DC 0x070E #define IL_RXGB 0x070F diff --git a/DevIL/src-IL/include/il_states.h b/DevIL/src-IL/include/il_states.h index 4add400a..1a303765 100644 --- a/DevIL/src-IL/include/il_states.h +++ b/DevIL/src-IL/include/il_states.h @@ -58,6 +58,7 @@ typedef struct IL_STATES ILuint ilQuantMaxIndexs; // DXTC states ILboolean ilKeepDxtcData; + ILboolean ilSkipDxtcDecompress; ILboolean ilUseNVidiaDXT; ILboolean ilUseSquishDXT; diff --git a/DevIL/src-IL/src/il_dds.cpp b/DevIL/src-IL/src/il_dds.cpp index 6606889f..714d375b 100644 --- a/DevIL/src-IL/src/il_dds.cpp +++ b/DevIL/src-IL/src/il_dds.cpp @@ -1043,6 +1043,9 @@ ILboolean AllocImage(ILuint CompFormat, ILboolean IsDXT10) */ ILboolean DdsDecompress(ILuint CompFormat, ILboolean IsDXT10) { + if (Image->DxtcData && ilGetInteger(IL_SKIP_DXTC_DECOMPRESS) == IL_TRUE) + return IL_TRUE; + if (IsDXT10) { //@TODO: Put in all compressed formats switch (CompFormat) diff --git a/DevIL/src-IL/src/il_states.cpp b/DevIL/src-IL/src/il_states.cpp index 752624bd..a532ad66 100644 --- a/DevIL/src-IL/src/il_states.cpp +++ b/DevIL/src-IL/src/il_states.cpp @@ -89,6 +89,7 @@ void ilDefaultStates() ilStates[ilCurrentPos].ilQuantMaxIndexs = 256; ilStates[ilCurrentPos].ilKeepDxtcData = IL_FALSE; + ilStates[ilCurrentPos].ilSkipDxtcDecompress = IL_FALSE; ilStates[ilCurrentPos].ilUseNVidiaDXT = IL_FALSE; ilStates[ilCurrentPos].ilUseSquishDXT = IL_FALSE; @@ -441,6 +442,9 @@ void ILAPIENTRY ilGetIntegerv(ILenum Mode, ILint *Param) case IL_KEEP_DXTC_DATA: *Param = ilStates[ilCurrentPos].ilKeepDxtcData; break; + case IL_SKIP_DXTC_DECOMPRESS: + *Param = ilStates[ilCurrentPos].ilSkipDxtcDecompress; + break; case IL_ORIGIN_MODE: *Param = ilStates[ilCurrentPos].ilOriginMode; break; @@ -1038,6 +1042,12 @@ void ILAPIENTRY ilSetInteger(ILenum Mode, ILint Param) return; } break; + case IL_SKIP_DXTC_DECOMPRESS: + if (Param == IL_FALSE || Param == IL_TRUE) { + ilStates[ilCurrentPos].ilSkipDxtcDecompress = Param; + return; + } + break; case IL_MAX_QUANT_INDICES: if (Param >= 2 && Param <= 256) { ilStates[ilCurrentPos].ilQuantMaxIndexs = Param;