From b1e866284a5f8d4fcbaeabfdc5a6d3e0a1cd9585 Mon Sep 17 00:00:00 2001 From: EtlamGit Date: Sat, 27 Dec 2025 23:38:24 +0100 Subject: [PATCH 1/5] refactor duplicated zlib code into own function to be called from multiple places other compression methods will also get such functions --- nbt/nbt.cpp | 91 +++++++++++++++++++++++------------------------------ nbt/nbt.h | 5 ++- nbt/tag.cpp | 3 +- 3 files changed, 44 insertions(+), 55 deletions(-) diff --git a/nbt/nbt.cpp b/nbt/nbt.cpp index e66946b9..99275fb8 100644 --- a/nbt/nbt.cpp +++ b/nbt/nbt.cpp @@ -8,80 +8,67 @@ // this handles decoding the gzipped level.dat NBT::NBT(const QString level) - : root(&NBT::Null) // just in case we die + : root(&NBT::Null) // just in case we die, init with empty data { QFile f(level); f.open(QIODevice::ReadOnly); QByteArray data = f.readAll(); f.close(); - QByteArray nbt; - z_stream strm; - static const int CHUNK_SIZE = 8192; - char out[CHUNK_SIZE]; - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.avail_in = data.size(); - strm.next_in = reinterpret_cast(data.data()); - - inflateInit2(&strm, 15 + 32); - do { - strm.avail_out = CHUNK_SIZE; - strm.next_out = reinterpret_cast(out); - inflate(&strm, Z_NO_FLUSH); - nbt.append(out, CHUNK_SIZE - strm.avail_out); - } while (strm.avail_out == 0); - inflateEnd(&strm); - - TagDataStream s(nbt.constData(), nbt.size()); - - if (s.r8() == 10) { // compound - s.skip(s.r16()); // skip name - root = new Tag_Compound(&s); - } + // level.dat is typically gzip format, but autodetect here + unpack_zlib(reinterpret_cast(data.data()), data.size(), 15 + 32); // +32 to autodetect gzip/zlib compression } -// this handles decoding a compressed() section of a region file +// this handles decoding a compressed Chunk of a region file NBT::NBT(const uchar *chunk) - : root(&NBT::Null) // just in case we die + : root(&NBT::Null) // just in case we die, init with empty data { - // find chunk size - int length = (chunk[0] << 24) | (chunk[1] << 16) | (chunk[2] << 8) | - chunk[3]; - if (chunk[4] != 2) // rfc1950 - return; - - z_stream strm; - static const int CHUNK_SIZE = 8192; + // find chunk size in first 4 bytes, format is fifth byte + int length = (chunk[0] << 24) | (chunk[1] << 16) | (chunk[2] << 8) | chunk[3]; + const unsigned char * data = reinterpret_cast(chunk) + 5; + // supported compression formats + if (chunk[4] == 1) unpack_zlib(data, length - 1, 15 + 16); // rfc1952 not used by official Minecraft + if (chunk[4] == 2) unpack_zlib(data, length - 1, 15 + 0); // rfc1950 default for all Chunk data +} + +Tag NBT::Null; + + +static const int CHUNK_SIZE = 8192; +// default window size: 15 bit +// + 0 zlib data (RFC 1950) +// +16 gzip data (RFC 1952) +// +32 autodetect zlib/gzip from header +void NBT::unpack_zlib(const unsigned char * data, unsigned long length, int windowsize) { + + z_stream stream; + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; + stream.opaque = Z_NULL; + stream.avail_in = length; + stream.next_in = const_cast(data); // yes we violate "const", but zlib will not change the input data + char out[CHUNK_SIZE]; - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.avail_in = length - 1; - strm.next_in = (Bytef *)chunk + 5; QByteArray nbt; - inflateInit(&strm); + inflateInit2(&stream, windowsize); do { - strm.avail_out = CHUNK_SIZE; - strm.next_out = reinterpret_cast(out); - inflate(&strm, Z_NO_FLUSH); - nbt.append(out, CHUNK_SIZE - strm.avail_out); - } while (strm.avail_out == 0); - inflateEnd(&strm); + stream.avail_out = CHUNK_SIZE; + stream.next_out = reinterpret_cast(out); + inflate(&stream, Z_NO_FLUSH); + nbt.append(out, CHUNK_SIZE - stream.avail_out); + } while (stream.avail_out == 0); + inflateEnd(&stream); TagDataStream s(nbt.constData(), nbt.size()); - if (s.r8() == 10) { // compound - s.skip(s.r16()); // skip name + if (s.r8() == Tag::TAG_COMPOUND) { // outer compound is expected + s.skip(s.r16()); // skip name (should be empty anyways) root = new Tag_Compound(&s); } } -Tag NBT::Null; - bool NBT::has(const QString key) const { return root->has(key); } diff --git a/nbt/nbt.h b/nbt/nbt.h index e1ae0d53..35287b26 100644 --- a/nbt/nbt.h +++ b/nbt/nbt.h @@ -17,8 +17,11 @@ class NBT { const Tag * at(const QString key) const; static Tag Null; + private: - Tag *root; + void unpack_zlib(const unsigned char * data, unsigned long length, int windowsize = 15); + + Tag * root; }; #endif // NBT_H_ diff --git a/nbt/tag.cpp b/nbt/tag.cpp index 49f62461..f2269ba5 100644 --- a/nbt/tag.cpp +++ b/nbt/tag.cpp @@ -334,8 +334,7 @@ const QVariant Tag_List::getData() const { Tag_Compound::Tag_Compound(TagDataStream *s) { quint8 type; - while ((type = s->r8()) != 0) { - // until tag_end + while ((type = s->r8()) != TAG_END) { // parse until we reach TAG_END quint16 len = s->r16(); QString key = s->utf8(len); Tag *child; From 750e475c98824f2998001f1ebc810c3b100bcd87 Mon Sep 17 00:00:00 2001 From: EtlamGit Date: Sat, 27 Dec 2025 23:40:21 +0100 Subject: [PATCH 2/5] add LZ4 files v1.10.0 https://lz4.org/ https://github.com/lz4/lz4 --- lz4/LICENSE | 24 + lz4/lz4.c | 2832 ++++++++++++++++++++++++++++++++++++++++++++++++++ lz4/lz4.h | 886 ++++++++++++++++ lz4/xxhash.c | 1030 ++++++++++++++++++ lz4/xxhash.h | 328 ++++++ minutor.pro | 6 +- 6 files changed, 5105 insertions(+), 1 deletion(-) create mode 100644 lz4/LICENSE create mode 100644 lz4/lz4.c create mode 100644 lz4/lz4.h create mode 100644 lz4/xxhash.c create mode 100644 lz4/xxhash.h diff --git a/lz4/LICENSE b/lz4/LICENSE new file mode 100644 index 00000000..48849169 --- /dev/null +++ b/lz4/LICENSE @@ -0,0 +1,24 @@ +LZ4 Library +Copyright (c) 2011-2020, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lz4/lz4.c b/lz4/lz4.c new file mode 100644 index 00000000..669b1304 --- /dev/null +++ b/lz4/lz4.c @@ -0,0 +1,2832 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (c) Yann Collet. All rights reserved. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Tuning parameters +**************************************/ +/* + * LZ4_HEAPMODE : + * Select how stateless compression functions like `LZ4_compress_default()` + * allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). + */ +#ifndef LZ4_HEAPMODE +# define LZ4_HEAPMODE 0 +#endif + +/* + * LZ4_ACCELERATION_DEFAULT : + * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 + */ +#define LZ4_ACCELERATION_DEFAULT 1 +/* + * LZ4_ACCELERATION_MAX : + * Any "acceleration" value higher than this threshold + * get treated as LZ4_ACCELERATION_MAX instead (fix #876) + */ +#define LZ4_ACCELERATION_MAX 65537 + + +/*-************************************ +* CPU Feature Detection +**************************************/ +/* LZ4_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which assembly generation depends on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +# if defined(__GNUC__) && \ + ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ + || (defined(__riscv) && defined(__riscv_zicclsm)) ) +# define LZ4_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || defined(_MSC_VER) +# define LZ4_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ +# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + + + +/*-************************************ +* Dependency +**************************************/ +/* + * LZ4_SRC_INCLUDED: + * Amalgamation flag, whether lz4.c is included + */ +#ifndef LZ4_SRC_INCLUDED +# define LZ4_SRC_INCLUDED 1 +#endif + +#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ +#endif + +#ifndef LZ4_STATIC_LINKING_ONLY +# define LZ4_STATIC_LINKING_ONLY +#endif +#include "lz4.h" +/* see also "memory routines" below */ + + +/*-************************************ +* Compiler Options +**************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ +# include /* only present in VS2005+ */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 6237) /* disable: C6237: conditional expression is always 0 */ +# pragma warning(disable : 6239) /* disable: C6239: ( && ) always evaluates to the result of */ +# pragma warning(disable : 6240) /* disable: C6240: ( && ) always evaluates to the result of */ +# pragma warning(disable : 6326) /* disable: C6326: Potential comparison of a constant with another constant */ +#endif /* _MSC_VER */ + +#ifndef LZ4_FORCE_INLINE +# if defined (_MSC_VER) && !defined (__clang__) /* MSVC */ +# define LZ4_FORCE_INLINE static __forceinline +# else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# if defined (__GNUC__) || defined (__clang__) +# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define LZ4_FORCE_INLINE static inline +# endif +# else +# define LZ4_FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +# endif /* _MSC_VER */ +#endif /* LZ4_FORCE_INLINE */ + +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE + * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, + * together with a simple 8-byte copy loop as a fall-back path. + * However, this optimization hurts the decompression speed by >30%, + * because the execution does not go to the optimized loop + * for typical compressible data, and all of the preamble checks + * before going to the fall-back path become useless overhead. + * This optimization happens only with the -O3 flag, and -O2 generates + * a simple 8-byte copy loop. + * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 + * functions are annotated with __attribute__((optimize("O2"))), + * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute + * of LZ4_wildCopy8 does not affect the compression speed. + */ +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) +#else +# define LZ4_FORCE_O2 +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#ifndef likely +#define likely(expr) expect((expr) != 0, 1) +#endif +#ifndef unlikely +#define unlikely(expr) expect((expr) != 0, 0) +#endif + +/* Should the alignment test prove unreliable, for some reason, + * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ +#ifndef LZ4_ALIGN_TEST /* can be externally provided */ +# define LZ4_ALIGN_TEST 1 +#endif + + +/*-************************************ +* Memory routines +**************************************/ + +/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION : + * Disable relatively high-level LZ4/HC functions that use dynamic memory + * allocation functions (malloc(), calloc(), free()). + * + * Note that this is a compile-time switch. And since it disables + * public/stable LZ4 v1 API functions, we don't recommend using this + * symbol to generate a library for distribution. + * + * The following public functions are removed when this symbol is defined. + * - lz4 : LZ4_createStream, LZ4_freeStream, + * LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated) + * - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC, + * LZ4_createHC (deprecated), LZ4_freeHC (deprecated) + * - lz4frame, lz4file : All LZ4F_* functions + */ +#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +# define ALLOC(s) lz4_error_memory_allocation_is_disabled +# define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled +# define FREEMEM(p) lz4_error_memory_allocation_is_disabled +#elif defined(LZ4_USER_MEMORY_FUNCTIONS) +/* memory management functions can be customized by user project. + * Below functions must exist somewhere in the Project + * and be available at link time */ +void* LZ4_malloc(size_t s); +void* LZ4_calloc(size_t n, size_t s); +void LZ4_free(void* p); +# define ALLOC(s) LZ4_malloc(s) +# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) +# define FREEMEM(p) LZ4_free(p) +#else +# include /* malloc, calloc, free */ +# define ALLOC(s) malloc(s) +# define ALLOC_AND_ZERO(s) calloc(1,s) +# define FREEMEM(p) free(p) +#endif + +#if ! LZ4_FREESTANDING +# include /* memset, memcpy */ +#endif +#if !defined(LZ4_memset) +# define LZ4_memset(p,v,s) memset((p),(v),(s)) +#endif +#define MEM_INIT(p,v,s) LZ4_memset((p),(v),(s)) + + +/*-************************************ +* Common Constants +**************************************/ +#define MINMATCH 4 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ +#define FASTLOOP_SAFE_DISTANCE 64 +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define LZ4_DISTANCE_ABSOLUTE_MAX 65535 +#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ +# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" +#endif + +#define ML_BITS 4 +#define ML_MASK ((1U<=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) +# include + static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ " %i: ", __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + +static int LZ4_isAligned(const void* ptr, size_t alignment) +{ + return ((size_t)ptr & (alignment -1)) == 0; +} + + +/*-************************************ +* Types +**************************************/ +#include +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef unsigned char BYTE; /*uint8_t not necessarily blessed to alias arbitrary type*/ + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef uintptr_t uptrval; +#else +# if UINT_MAX != 4294967295UL +# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" +# endif + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef size_t uptrval; /* generally true, except OpenVMS-64 */ +#endif + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +typedef enum { + notLimited = 0, + limitedOutput = 1, + fillOutput = 2 +} limitedOutput_directive; + + +/*-************************************ +* Reading and writing into memory +**************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#if !defined(LZ4_memcpy) +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +# else +# define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) +# endif +#endif + +#if !defined(LZ4_memmove) +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4_memmove __builtin_memmove +# else +# define LZ4_memmove memmove +# endif +#endif + +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define LZ4_PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__)) +#elif defined(_MSC_VER) +#define LZ4_PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop)) +#endif + +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ + +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } + +static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16; +LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32; +LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST; + +static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign16*)ptr)->u16; } +static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign32*)ptr)->u32; } +static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalignST*)ptr)->uArch; } + +static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign16*)memPtr)->u16 = value; } +static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign32*)memPtr)->u32 = value; } + +#else /* safe and portable access using memcpy() */ + +static U16 LZ4_read16(const void* memPtr) +{ + U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 LZ4_read32(const void* memPtr) +{ + U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static reg_t LZ4_read_ARCH(const void* memPtr) +{ + reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static void LZ4_write16(void* memPtr, U16 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +static void LZ4_write32(void* memPtr, U32 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* LZ4_FORCE_MEMORY_ACCESS */ + + +static U16 LZ4_readLE16(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)((U16)p[0] | (p[1]<<8)); + } +} + +#ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT +static U32 LZ4_readLE32(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read32(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U32)p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); + } +} +#endif + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if (LZ4_isLittleEndian()) { + LZ4_write16(memPtr, value); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} + +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +LZ4_FORCE_INLINE +void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ +LZ4_FORCE_INLINE void +LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH + * - there is at least 12 bytes available to write after dstEnd */ +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +{ + BYTE v[8]; + + assert(dstEnd >= dstPtr + MINMATCH); + + switch(offset) { + case 1: + MEM_INIT(v, *srcPtr, 8); + break; + case 2: + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); +#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */ +# pragma warning(push) +# pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */ +#endif + LZ4_memcpy(&v[4], v, 4); +#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */ +# pragma warning(pop) +#endif + break; + case 4: + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); + break; + default: + LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); + return; + } + + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + while (dstPtr < dstEnd) { + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + } +} +#endif + + +/*-************************************ +* Common functions +**************************************/ +static unsigned LZ4_NbCommonBytes (reg_t val) +{ + assert(val != 0); + if (LZ4_isLittleEndian()) { + if (sizeof(val) == 8) { +# if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT) +/*-************************************************************************************************* +* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11. +* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics +* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC. +****************************************************************************************************/ +# if defined(__clang__) && (__clang_major__ < 10) + /* Avoid undefined clang-cl intrinsics issue. + * See https://github.com/lz4/lz4/pull/1017 for details. */ + return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3; +# else + /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ + return (unsigned)_tzcnt_u64(val) >> 3; +# endif +# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64(&r, (U64)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctzll((U64)val) >> 3; +# else + const U64 m = 0x0101010101010101ULL; + val ^= val - 1; + return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); +# endif + } else /* 32 bits */ { +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz((U32)val) >> 3; +# else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; +# endif + } + } else /* Big Endian CPU */ { + if (sizeof(val)==8) { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clzll((U64)val) >> 3; +# else +#if 1 + /* this method is probably faster, + * but adds a 128 bytes lookup table */ + static const unsigned char ctz7_tab[128] = { + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + }; + U64 const mask = 0x0101010101010101ULL; + U64 const t = (((val >> 8) - mask) | val) & mask; + return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; +#else + /* this method doesn't consume memory space like the previous one, + * but it contains several branches, + * that may end up slowing execution */ + static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. + Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. + Note that this code path is never triggered in 32-bits mode. */ + unsigned r; + if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +#endif +# endif + } else /* 32 bits */ { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz((U32)val) >> 3; +# else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> 24; + return (unsigned)val ^ 3; +# endif + } + } +} + + +#define STEPSIZE sizeof(reg_t) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + if (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn+=STEPSIZE; pMatch+=STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } } + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } + + if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn compression run slower on incompressible data */ + + +/*-************************************ +* Local Structures and types +**************************************/ +typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; + +/** + * This enum distinguishes several different modes of accessing previous + * content in the stream. + * + * - noDict : There is no preceding content. + * - withPrefix64k : Table entries up to ctx->dictSize before the current blob + * blob being compressed are valid and refer to the preceding + * content (of length ctx->dictSize), which is available + * contiguously preceding in memory the content currently + * being compressed. + * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere + * else in memory, starting at ctx->dictionary with length + * ctx->dictSize. + * - usingDictCtx : Everything concerning the preceding content is + * in a separate context, pointed to by ctx->dictCtx. + * ctx->dictionary, ctx->dictSize, and table entries + * in the current context that refer to positions + * preceding the beginning of the current compression are + * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx + * ->dictSize describe the location and size of the preceding + * content, and matches are found by looking in the ctx + * ->dictCtx->hashTable. + */ +typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + + +/*-************************************ +* Local Utils +**************************************/ +int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } +const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } +int LZ4_compressBound(int isize) { return LZ4__COMPRESSBOUND(isize); } +int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); } + + +/*-**************************************** +* Internal Definitions, used only in Tests +*******************************************/ +#if defined (__cplusplus) +extern "C" { +#endif + +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); + +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize); +int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, + int compressedSize, int targetOutputSize, int dstCapacity, + const void* dictStart, size_t dictSize); +#if defined (__cplusplus) +} +#endif + +/*-****************************** +* Compression functions +********************************/ +LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) +{ + if (tableType == byU16) + return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +{ + const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; + if (LZ4_isLittleEndian()) { + const U64 prime5bytes = 889523592379ULL; + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); + } else { + const U64 prime8bytes = 11400714785074694791ULL; + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); + } +} + +LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) +{ + if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); + +#ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT + return LZ4_hash4(LZ4_readLE32(p), tableType); +#else + return LZ4_hash4(LZ4_read32(p), tableType); +#endif +} + +LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: /* fallthrough */ + case byPtr: { /* illegal! */ assert(0); return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } + case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } + } +} + +/* LZ4_putPosition*() : only used in byPtr mode */ +LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, + void* tableBase, tableType_t const tableType) +{ + const BYTE** const hashTable = (const BYTE**)tableBase; + assert(tableType == byPtr); (void)tableType; + hashTable[h] = p; +} + +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType) +{ + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType); +} + +/* LZ4_getIndexOnHash() : + * Index of match position registered in hash table. + * hash position must be calculated by using base+index, or dictBase+index. + * Assumption 1 : only valid if tableType == byU32 or byU16. + * Assumption 2 : h is presumed valid (within limits of hash table) + */ +LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) +{ + LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); + if (tableType == byU32) { + const U32* const hashTable = (const U32*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-2))); + return hashTable[h]; + } + if (tableType == byU16) { + const U16* const hashTable = (const U16*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-1))); + return hashTable[h]; + } + assert(0); return 0; /* forbidden case */ +} + +static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType) +{ + assert(tableType == byPtr); (void)tableType; + { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } +} + +LZ4_FORCE_INLINE const BYTE* +LZ4_getPosition(const BYTE* p, + const void* tableBase, tableType_t tableType) +{ + U32 const h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType); +} + +LZ4_FORCE_INLINE void +LZ4_prepareTable(LZ4_stream_t_internal* const cctx, + const int inputSize, + const tableType_t tableType) { + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is + * therefore safe to use no matter what mode we're in. Otherwise, we figure + * out if it's safe to leave as is or whether it needs to be reset. + */ + if ((tableType_t)cctx->tableType != clearedTable) { + assert(inputSize >= 0); + if ((tableType_t)cctx->tableType != tableType + || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) + || ((tableType == byU32) && cctx->currentOffset > 1 GB) + || tableType == byPtr + || inputSize >= 4 KB) + { + DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", (void*)cctx); + MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); + cctx->currentOffset = 0; + cctx->tableType = (U32)clearedTable; + } else { + DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); + } + } + + /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, + * is faster than compressing without a gap. + * However, compressing with currentOffset == 0 is faster still, + * so we preserve that case. + */ + if (cctx->currentOffset != 0 && tableType == byU32) { + DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); + cctx->currentOffset += 64 KB; + } + + /* Finally, clear history */ + cctx->dictCtx = NULL; + cctx->dictionary = NULL; + cctx->dictSize = 0; +} + +/** LZ4_compress_generic_validated() : + * inlined, to ensure branches are decided at compilation time. + * The following conditions are presumed already validated: + * - source != NULL + * - inputSize > 0 + */ +LZ4_FORCE_INLINE int LZ4_compress_generic_validated( + LZ4_stream_t_internal* const cctx, + const char* const source, + char* const dest, + const int inputSize, + int* inputConsumed, /* only written when outputDirective == fillOutput */ + const int maxOutputSize, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + int result; + const BYTE* ip = (const BYTE*)source; + + U32 const startIndex = cctx->currentOffset; + const BYTE* base = (const BYTE*)source - startIndex; + const BYTE* lowLimit; + + const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; + const BYTE* const dictionary = + dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; + const U32 dictSize = + dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; + const U32 dictDelta = + (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with indexes in current context */ + + int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); + U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ + const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; + const BYTE* const matchlimit = iend - LASTLITERALS; + + /* the dictCtx currentOffset is indexed on the start of the dictionary, + * while a dictionary in the current context precedes the currentOffset */ + const BYTE* dictBase = (dictionary == NULL) ? NULL : + (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; + + BYTE* op = (BYTE*) dest; + BYTE* const olimit = op + maxOutputSize; + + U32 offset = 0; + U32 forwardH; + + DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); + assert(ip != NULL); + if (tableType == byU16) assert(inputSize= 1); + + lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); + + /* Update context state */ + if (dictDirective == usingDictCtx) { + /* Subsequent linked blocks can't use the dictionary. */ + /* Instead, they use the block we just compressed. */ + cctx->dictCtx = NULL; + cctx->dictSize = (U32)inputSize; + } else { + cctx->dictSize += (U32)inputSize; + } + cctx->currentOffset += (U32)inputSize; + cctx->tableType = (U32)tableType; + + if (inputSizehashTable, byPtr); + } else { + LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType); + } } + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + const BYTE* match; + BYTE* token; + const BYTE* filledIp; + + /* Find a match */ + if (tableType == byPtr) { + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType); + + } while ( (match+LZ4_DISTANCE_MAX < ip) + || (LZ4_read32(match) != LZ4_read32(ip)) ); + + } else { /* byU32, byU16 */ + + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + U32 const current = (U32)(forwardIp - base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex <= current); + assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + matchIndex += dictDelta; /* make dictCtx index comparable with current context */ + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else if (dictDirective == usingExtDict) { + if (matchIndex < startIndex) { + DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); + assert(startIndex - matchIndex >= MINMATCH); + assert(dictBase); + match = dictBase + matchIndex; + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else { /* single continuous memory segment */ + match = base + matchIndex; + } + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + + DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); + if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ + assert(matchIndex < current); + if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) + && (matchIndex+LZ4_DISTANCE_MAX < current)) { + continue; + } /* too far */ + assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ + + if (LZ4_read32(match) == LZ4_read32(ip)) { + if (maybe_extMem) offset = current - matchIndex; + break; /* match found */ + } + + } while(1); + } + + /* Catch up */ + filledIp = ip; + assert(ip > anchor); /* this is always true as ip has been advanced before entering the main loop */ + if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) { + do { ip--; match--; } while (((ip > anchor) & (match > lowLimit)) && (unlikely(ip[-1] == match[-1]))); + } + + /* Encode Literals */ + { unsigned const litLength = (unsigned)(ip - anchor); + token = op++; + if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ + (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + if ((outputDirective == fillOutput) && + (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + unsigned len = litLength - RUN_MASK; + *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(litLength< olimit)) { + /* the match was too close to the end, rewind and go to last literals */ + op = token; + goto _last_literals; + } + + /* Encode Offset */ + if (maybe_extMem) { /* static test */ + DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); + assert(offset <= LZ4_DISTANCE_MAX && offset > 0); + LZ4_writeLE16(op, (U16)offset); op+=2; + } else { + DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); + assert(ip-match <= LZ4_DISTANCE_MAX); + LZ4_writeLE16(op, (U16)(ip - match)); op+=2; + } + + /* Encode MatchLength */ + { unsigned matchCode; + + if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) + && (lowLimit==dictionary) /* match within extDict */ ) { + const BYTE* limit = ip + (dictEnd-match); + assert(dictEnd > match); + if (limit > matchlimit) limit = matchlimit; + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); + ip += (size_t)matchCode + MINMATCH; + if (ip==limit) { + unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); + matchCode += more; + ip += more; + } + DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); + } else { + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); + ip += (size_t)matchCode + MINMATCH; + DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); + } + + if ((outputDirective) && /* Check output buffer overflow */ + (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { + if (outputDirective == fillOutput) { + /* Match description too long : reduce it */ + U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; + ip -= matchCode - newMatchCode; + assert(newMatchCode < matchCode); + matchCode = newMatchCode; + if (unlikely(ip <= filledIp)) { + /* We have already filled up to filledIp so if ip ends up less than filledIp + * we have positions in the hash table beyond the current position. This is + * a problem if we reuse the hash table. So we have to remove these positions + * from the hash table. + */ + const BYTE* ptr; + DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); + for (ptr = ip; ptr <= filledIp; ++ptr) { + U32 const h = LZ4_hashPosition(ptr, tableType); + LZ4_clearHash(h, cctx->hashTable, tableType); + } + } + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4*255) { + op+=4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4*255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + /* Ensure we have enough space for the last literals. */ + assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); + + anchor = ip; + + /* Test end of chunk */ + if (ip >= mflimitPlusOne) break; + + /* Fill table */ + { U32 const h = LZ4_hashPosition(ip-2, tableType); + if (tableType == byPtr) { + LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, byPtr); + } else { + U32 const idx = (U32)((ip-2) - base); + LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType); + } } + + /* Test next position */ + if (tableType == byPtr) { + + match = LZ4_getPosition(ip, cctx->hashTable, tableType); + LZ4_putPosition(ip, cctx->hashTable, tableType); + if ( (match+LZ4_DISTANCE_MAX >= ip) + && (LZ4_read32(match) == LZ4_read32(ip)) ) + { token=op++; *token=0; goto _next_match; } + + } else { /* byU32, byU16 */ + + U32 const h = LZ4_hashPosition(ip, tableType); + U32 const current = (U32)(ip-base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex < current); + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + matchIndex += dictDelta; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + assert(dictBase); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else { /* single memory segment */ + match = base + matchIndex; + } + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + assert(matchIndex < current); + if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) + && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) + && (LZ4_read32(match) == LZ4_read32(ip)) ) { + token=op++; + *token=0; + if (maybe_extMem) offset = current - matchIndex; + DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); + goto _next_match; + } + } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRun = (size_t)(iend - anchor); + if ( (outputDirective) && /* Check output buffer overflow */ + (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { + if (outputDirective == fillOutput) { + /* adapt lastRun to fill 'dst' */ + assert(olimit >= op); + lastRun = (size_t)(olimit-op) - 1/*token*/; + lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun< 0); + DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); + return result; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time; + * takes care of src == (NULL, 0) + * and forward the rest to LZ4_compress_generic_validated */ +LZ4_FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal* const cctx, + const char* const src, + char* const dst, + const int srcSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int dstCapacity, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", + srcSize, dstCapacity); + + if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ + if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ + if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ + DEBUGLOG(5, "Generating an empty block"); + assert(outputDirective == notLimited || dstCapacity >= 1); + assert(dst != NULL); + dst[0] = 0; + if (outputDirective == fillOutput) { + assert (inputConsumed != NULL); + *inputConsumed = 0; + } + return 1; + } + assert(src != NULL); + + return LZ4_compress_generic_validated(cctx, src, dst, srcSize, + inputConsumed, /* only written into if outputDirective == fillOutput */ + dstCapacity, outputDirective, + tableType, dictDirective, dictIssue, acceleration); +} + + +int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; + assert(ctx != NULL); + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + +/** + * LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of + * "correctly initialized"). + */ +int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) +{ + LZ4_stream_t_internal* const ctx = &((LZ4_stream_t*)state)->internal_donotuse; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + assert(ctx != NULL); + + if (dstCapacity >= LZ4_compressBound(srcSize)) { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + + +int LZ4_compress_fast(const char* src, char* dest, int srcSize, int dstCapacity, int acceleration) +{ + int result; +#if (LZ4_HEAPMODE) + LZ4_stream_t* const ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctxPtr == NULL) return 0; +#else + LZ4_stream_t ctx; + LZ4_stream_t* const ctxPtr = &ctx; +#endif + result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, dstCapacity, acceleration); + +#if (LZ4_HEAPMODE) + FREEMEM(ctxPtr); +#endif + return result; +} + + +int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity) +{ + return LZ4_compress_fast(src, dst, srcSize, dstCapacity, 1); +} + + +/* Note!: This function leaves the stream in an unclean/broken state! + * It is not safe to subsequently use the same state with a _fastReset() or + * _continue() call without resetting it. */ +static int LZ4_compress_destSize_extState_internal(LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration) +{ + void* const s = LZ4_initStream(state, sizeof (*state)); + assert(s != NULL); (void)s; + + if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ + return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, acceleration); + } else { + if (*srcSizePtr < LZ4_64Klimit) { + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, acceleration); + } else { + tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, acceleration); + } } +} + +int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration) +{ + int const r = LZ4_compress_destSize_extState_internal((LZ4_stream_t*)state, src, dst, srcSizePtr, targetDstSize, acceleration); + /* clean the state on exit */ + LZ4_initStream(state, sizeof (LZ4_stream_t)); + return r; +} + + +int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ +#if (LZ4_HEAPMODE) + LZ4_stream_t* const ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctx == NULL) return 0; +#else + LZ4_stream_t ctxBody; + LZ4_stream_t* const ctx = &ctxBody; +#endif + + int result = LZ4_compress_destSize_extState_internal(ctx, src, dst, srcSizePtr, targetDstSize, 1); + +#if (LZ4_HEAPMODE) + FREEMEM(ctx); +#endif + return result; +} + + + +/*-****************************** +* Streaming functions +********************************/ + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_stream_t* LZ4_createStream(void) +{ + LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); + LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal)); + DEBUGLOG(4, "LZ4_createStream %p", (void*)lz4s); + if (lz4s == NULL) return NULL; + LZ4_initStream(lz4s, sizeof(*lz4s)); + return lz4s; +} +#endif + +static size_t LZ4_stream_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_stream_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_stream_t); +#else + return 1; /* effectively disabled */ +#endif +} + +LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) +{ + DEBUGLOG(5, "LZ4_initStream"); + if (buffer == NULL) { return NULL; } + if (size < sizeof(LZ4_stream_t)) { return NULL; } + if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; + MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); + return (LZ4_stream_t*)buffer; +} + +/* resetStream is now deprecated, + * prefer initStream() which is more general */ +void LZ4_resetStream (LZ4_stream_t* LZ4_stream) +{ + DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", (void*)LZ4_stream); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); +} + +void LZ4_resetStream_fast(LZ4_stream_t* ctx) { + LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +int LZ4_freeStream (LZ4_stream_t* LZ4_stream) +{ + if (!LZ4_stream) return 0; /* support free on NULL */ + DEBUGLOG(5, "LZ4_freeStream %p", (void*)LZ4_stream); + FREEMEM(LZ4_stream); + return (0); +} +#endif + + +typedef enum { _ld_fast, _ld_slow } LoadDict_mode_e; +#define HASH_UNIT sizeof(reg_t) +int LZ4_loadDict_internal(LZ4_stream_t* LZ4_dict, + const char* dictionary, int dictSize, + LoadDict_mode_e _ld) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + const tableType_t tableType = byU32; + const BYTE* p = (const BYTE*)dictionary; + const BYTE* const dictEnd = p + dictSize; + U32 idx32; + + DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, (void*)dictionary, (void*)LZ4_dict); + + /* It's necessary to reset the context, + * and not just continue it with prepareTable() + * to avoid any risk of generating overflowing matchIndex + * when compressing using this dictionary */ + LZ4_resetStream(LZ4_dict); + + /* We always increment the offset by 64 KB, since, if the dict is longer, + * we truncate it to the last 64k, and if it's shorter, we still want to + * advance by a whole window length so we can provide the guarantee that + * there are only valid offsets in the window, which allows an optimization + * in LZ4_compress_fast_continue() where it uses noDictIssue even when the + * dictionary isn't a full 64k. */ + dict->currentOffset += 64 KB; + + if (dictSize < (int)HASH_UNIT) { + return 0; + } + + if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->tableType = (U32)tableType; + idx32 = dict->currentOffset - dict->dictSize; + + while (p <= dictEnd-HASH_UNIT) { + U32 const h = LZ4_hashPosition(p, tableType); + /* Note: overwriting => favors positions end of dictionary */ + LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType); + p+=3; idx32+=3; + } + + if (_ld == _ld_slow) { + /* Fill hash table with additional references, to improve compression capability */ + p = dict->dictionary; + idx32 = dict->currentOffset - dict->dictSize; + while (p <= dictEnd-HASH_UNIT) { + U32 const h = LZ4_hashPosition(p, tableType); + U32 const limit = dict->currentOffset - 64 KB; + if (LZ4_getIndexOnHash(h, dict->hashTable, tableType) <= limit) { + /* Note: not overwriting => favors positions beginning of dictionary */ + LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType); + } + p++; idx32++; + } + } + + return (int)dict->dictSize; +} + +int LZ4_loadDict(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast); +} + +int LZ4_loadDictSlow(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_slow); +} + +void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) +{ + const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL : + &(dictionaryStream->internal_donotuse); + + DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", + (void*)workingStream, (void*)dictionaryStream, + dictCtx != NULL ? dictCtx->dictSize : 0); + + if (dictCtx != NULL) { + /* If the current offset is zero, we will never look in the + * external dictionary context, since there is no value a table + * entry can take that indicate a miss. In that case, we need + * to bump the offset to something non-zero. + */ + if (workingStream->internal_donotuse.currentOffset == 0) { + workingStream->internal_donotuse.currentOffset = 64 KB; + } + + /* Don't actually attach an empty dictionary. + */ + if (dictCtx->dictSize == 0) { + dictCtx = NULL; + } + } + workingStream->internal_donotuse.dictCtx = dictCtx; +} + + +static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) +{ + assert(nextSize >= 0); + if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 KB; + const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + DEBUGLOG(4, "LZ4_renormDictT"); + for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; + else LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 KB; + if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + + +int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, + const char* source, char* dest, + int inputSize, int maxOutputSize, + int acceleration) +{ + const tableType_t tableType = byU32; + LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse; + const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL; + + DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize); + + LZ4_renormDictT(streamPtr, inputSize); /* fix index overflow */ + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + /* invalidate tiny dictionaries */ + if ( (streamPtr->dictSize < 4) /* tiny dictionary : not enough for a hash */ + && (dictEnd != source) /* prefix mode */ + && (inputSize > 0) /* tolerance : don't lose history, in case next invocation would use prefix mode */ + && (streamPtr->dictCtx == NULL) /* usingDictCtx */ + ) { + DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, (void*)streamPtr->dictionary); + /* remove dictionary existence from history, to employ faster prefix mode */ + streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)source; + dictEnd = source; + } + + /* Check overlapping input/dictionary space */ + { const char* const sourceEnd = source + inputSize; + if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == source) { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); + else + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); + } + + /* external dictionary mode */ + { int result; + if (streamPtr->dictCtx) { + /* We depend here on the fact that dictCtx'es (produced by + * LZ4_loadDict) guarantee that their tables contain no references + * to offsets between dictCtx->currentOffset - 64 KB and + * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe + * to use noDictIssue even when the dict isn't a full 64 KB. + */ + if (inputSize > 4 KB) { + /* For compressing large blobs, it is faster to pay the setup + * cost to copy the dictionary's tables into the active context, + * so that the compression loop is only looking into one table. + */ + LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); + } + } else { /* small data <= 4 KB */ + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } + } + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)inputSize; + return result; + } +} + + +/* Hidden debug function, to force-test external dictionary mode */ +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) +{ + LZ4_stream_t_internal* const streamPtr = &LZ4_dict->internal_donotuse; + int result; + + LZ4_renormDictT(streamPtr, srcSize); + + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); + } + + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)srcSize; + + return result; +} + + +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its memory location, + * save it into a safer place (char* safeBuffer). + * Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable, + * one can therefore call LZ4_compress_fast_continue() right after. + * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ +int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + + DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, (void*)safeBuffer); + + if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ + if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } + + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) { + const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; + assert(dict->dictionary); + LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize); + } + + dict->dictionary = (const BYTE*)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} + + + +/*-******************************* + * Decompression functions + ********************************/ + +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + + +/* variant for decompress_unsafe() + * does not know end of input + * presumes input is well formed + * note : will consume at least one byte */ +static size_t read_long_length_no_check(const BYTE** pp) +{ + size_t b, l = 0; + do { b = **pp; (*pp)++; l += b; } while (b==255); + DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1) + return l; +} + +/* core decoder variant for LZ4_decompress_fast*() + * for legacy support only : these entry points are deprecated. + * - Presumes input is correctly formed (no defense vs malformed inputs) + * - Does not know input size (presume input buffer is "large enough") + * - Decompress a full block (only) + * @return : nb of bytes read from input. + * Note : this variant is not optimized for speed, just for maintenance. + * the goal is to remove support of decompress_fast*() variants by v2.0 +**/ +LZ4_FORCE_INLINE int +LZ4_decompress_unsafe_generic( + const BYTE* const istart, + BYTE* const ostart, + int decompressedSize, + + size_t prefixSize, + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note: =0 if dictStart==NULL */ + ) +{ + const BYTE* ip = istart; + BYTE* op = (BYTE*)ostart; + BYTE* const oend = ostart + decompressedSize; + const BYTE* const prefixStart = ostart - prefixSize; + + DEBUGLOG(5, "LZ4_decompress_unsafe_generic"); + if (dictStart == NULL) assert(dictSize == 0); + + while (1) { + /* start new sequence */ + unsigned token = *ip++; + + /* literals */ + { size_t ll = token >> ML_BITS; + if (ll==15) { + /* long literal length */ + ll += read_long_length_no_check(&ip); + } + if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */ + LZ4_memmove(op, ip, ll); /* support in-place decompression */ + op += ll; + ip += ll; + if ((size_t)(oend-op) < MFLIMIT) { + if (op==oend) break; /* end of block */ + DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op); + /* incorrect end of block : + * last match must start at least MFLIMIT==12 bytes before end of output block */ + return -1; + } } + + /* match */ + { size_t ml = token & 15; + size_t const offset = LZ4_readLE16(ip); + ip+=2; + + if (ml==15) { + /* long literal length */ + ml += read_long_length_no_check(&ip); + } + ml += MINMATCH; + + if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */ + + { const BYTE* match = op - offset; + + /* out of range */ + if (offset > (size_t)(op - prefixStart) + dictSize) { + DEBUGLOG(6, "offset out of range"); + return -1; + } + + /* check special case : extDict */ + if (offset > (size_t)(op - prefixStart)) { + /* extDict scenario */ + const BYTE* const dictEnd = dictStart + dictSize; + const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart)); + size_t const extml = (size_t)(dictEnd - extMatch); + if (extml > ml) { + /* match entirely within extDict */ + LZ4_memmove(op, extMatch, ml); + op += ml; + ml = 0; + } else { + /* match split between extDict & prefix */ + LZ4_memmove(op, extMatch, extml); + op += extml; + ml -= extml; + } + match = prefixStart; + } + + /* match copy - slow variant, supporting overlap copy */ + { size_t u; + for (u=0; u= ipmax before start of loop. Returns initial_error if so. + * @error (output) - error code. Must be set to 0 before call. +**/ +typedef size_t Rvl_t; +static const Rvl_t rvl_error = (Rvl_t)(-1); +LZ4_FORCE_INLINE Rvl_t +read_variable_length(const BYTE** ip, const BYTE* ilimit, + int initial_check) +{ + Rvl_t s, length = 0; + assert(ip != NULL); + assert(*ip != NULL); + assert(ilimit != NULL); + if (initial_check && unlikely((*ip) >= ilimit)) { /* read limit reached */ + return rvl_error; + } + s = **ip; + (*ip)++; + length += s; + if (unlikely((*ip) > ilimit)) { /* read limit reached */ + return rvl_error; + } + /* accumulator overflow detection (32-bit mode only) */ + if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) { + return rvl_error; + } + if (likely(s != 255)) return length; + do { + s = **ip; + (*ip)++; + length += s; + if (unlikely((*ip) > ilimit)) { /* read limit reached */ + return rvl_error; + } + /* accumulator overflow detection (32-bit mode only) */ + if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) { + return rvl_error; + } + } while (s == 255); + + return length; +} + +/*! LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, + * in order to remove useless branches during compilation optimization. + */ +LZ4_FORCE_INLINE int +LZ4_decompress_generic( + const char* const src, + char* const dst, + int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ + + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ + ) +{ + if ((src == NULL) || (outputSize < 0)) { return -1; } + + { const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + outputSize; + BYTE* cpy; + + const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; + + const int checkOffset = (dictSize < (int)(64 KB)); + + + /* Set up the "end" pointers for the shortcut. */ + const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/; + const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/; + + const BYTE* match; + size_t offset; + unsigned token; + size_t length; + + + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); + + /* Special cases */ + assert(lowPrefix <= op); + if (unlikely(outputSize==0)) { + /* Empty output buffer */ + if (partialDecoding) return 0; + return ((srcSize==1) && (*ip==0)) ? 0 : -1; + } + if (unlikely(srcSize==0)) { return -1; } + + /* LZ4_FAST_DEC_LOOP: + * designed for modern OoO performance cpus, + * where copying reliably 32-bytes is preferable to an unpredictable branch. + * note : fast loop may show a regression for some client arm chips. */ +#if LZ4_FAST_DEC_LOOP + if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(6, "move to safe decode loop"); + goto safe_decode; + } + + /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */ + DEBUGLOG(6, "using fast decode loop"); + while (1) { + /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ + assert(oend - op >= FASTLOOP_SAFE_DISTANCE); + assert(ip < iend); + token = *ip++; + length = token >> ML_BITS; /* literal length */ + DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + + /* decode literal length */ + if (length == RUN_MASK) { + size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); + if (addl == rvl_error) { + DEBUGLOG(6, "error reading long literal length"); + goto _output_error; + } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + + /* copy literals */ + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ((op+length>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } + LZ4_wildCopy32(op, ip, op+length); + ip += length; op += length; + } else if (ip <= iend-(16 + 1/*max lit + offset + nextToken*/)) { + /* We don't need to check oend, since we check it once for each loop below */ + DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); + /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */ + LZ4_memcpy(op, ip, 16); + ip += length; op += length; + } else { + goto safe_literal_copy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + DEBUGLOG(6, "blockPos%6u: offset = %u", (unsigned)(op-(BYTE*)dst), (unsigned)offset); + match = op - offset; + assert(match <= op); /* overflow check */ + + /* get matchlength */ + length = token & ML_MASK; + DEBUGLOG(7, " match length token = %u (len==%u)", (unsigned)length, (unsigned)length+MINMATCH); + + if (length == ML_MASK) { + size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); + if (addl == rvl_error) { + DEBUGLOG(5, "error reading long match length"); + goto _output_error; + } + length += addl; + length += MINMATCH; + DEBUGLOG(7, " long match length == %u", (unsigned)length); + if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + } else { + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(7, "moving to safe_match_copy (ml==%u)", (unsigned)length); + goto safe_match_copy; + } + + /* Fastpath check: skip LZ4_wildCopy32 when true */ + if ((dict == withPrefix64k) || (match >= lowPrefix)) { + if (offset >= 8) { + assert(match >= lowPrefix); + assert(match <= op); + assert(op + 18 <= oend); + + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op+8, match+8, 8); + LZ4_memcpy(op+16, match+16, 2); + op += length; + continue; + } } } + + if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) { + DEBUGLOG(5, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match); + goto _output_error; + } + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + assert(dictEnd != NULL); + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) { + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); + } else { + DEBUGLOG(6, "end-of-block condition violated") + goto _output_error; + } } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + LZ4_memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) { *op++ = *copyFrom++; } + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + + /* copy match within block */ + cpy = op + length; + + assert((op <= oend) && (oend-op >= 32)); + if (unlikely(offset<16)) { + LZ4_memcpy_using_offset(op, match, cpy, offset); + } else { + LZ4_wildCopy32(op, match, cpy); + } + + op = cpy; /* wildcopy correction */ + } + safe_decode: +#endif + + /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ + DEBUGLOG(6, "using safe decode loop"); + while (1) { + assert(ip < iend); + token = *ip++; + length = token >> ML_BITS; /* literal length */ + DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + + /* A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough space, + * enter the shortcut and copy 16 bytes on behalf of the literals + * (in the fast mode, only 8 bytes can be safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes in a similar + * manner; but we ensure that there's enough space in the output for + * those 18 bytes earlier, upon entering the shortcut (in other words, + * there is a combined check for both stages). + */ + if ( (length != RUN_MASK) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((ip < shortiend) & (op <= shortoend)) ) { + /* Copy the literals */ + LZ4_memcpy(op, ip, 16); + op += length; ip += length; + + /* The second stage: prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. */ + length = token & ML_MASK; /* match length */ + DEBUGLOG(7, "blockPos%6u: matchLength token = %u (len=%u)", (unsigned)(op-(BYTE*)dst), (unsigned)length, (unsigned)length + 4); + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ( (length != ML_MASK) + && (offset >= 8) + && (dict==withPrefix64k || match >= lowPrefix) ) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op +16, match +16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* The second stage didn't work out, but the info is ready. + * Propel it right to the point of match copying. */ + goto _copy_match; + } + + /* decode literal length */ + if (length == RUN_MASK) { + size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); + if (addl == rvl_error) { goto _output_error; } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + } + +#if LZ4_FAST_DEC_LOOP + safe_literal_copy: +#endif + /* copy literals */ + cpy = op+length; + + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) { + /* We've either hit the input parsing restriction or the output parsing restriction. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. + */ + if (partialDecoding) { + /* Since we are partial decoding we may be in this block because of the output parsing + * restriction, which is not valid since the output buffer is allowed to be undersized. + */ + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. + */ + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. + */ + if (cpy > oend) { + cpy = oend; + assert(op<=oend); + length = (size_t)(oend-op); + } + } else { + /* We must be on the last sequence (or invalid) because of the parsing limitations + * so check that we exactly consume the input and don't overrun the output buffer. + */ + if ((ip+length != iend) || (cpy > oend)) { + DEBUGLOG(5, "should have been last run of literals") + DEBUGLOG(5, "ip(%p) + length(%i) = %p != iend (%p)", (void*)ip, (int)length, (void*)(ip+length), (void*)iend); + DEBUGLOG(5, "or cpy(%p) > (oend-MFLIMIT)(%p)", (void*)cpy, (void*)(oend-MFLIMIT)); + DEBUGLOG(5, "after writing %u bytes / %i bytes available", (unsigned)(op-(BYTE*)dst), outputSize); + goto _output_error; + } + } + LZ4_memmove(op, ip, length); /* supports overlapping memory regions, for in-place decompression scenarios */ + ip += length; + op += length; + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { + break; + } + } else { + LZ4_wildCopy8(op, ip, cpy); /* can overwrite up to 8 bytes beyond cpy */ + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + + /* get matchlength */ + length = token & ML_MASK; + DEBUGLOG(7, "blockPos%6u: matchLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + + _copy_match: + if (length == ML_MASK) { + size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); + if (addl == rvl_error) { goto _output_error; } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + +#if LZ4_FAST_DEC_LOOP + safe_match_copy: +#endif + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + assert(dictEnd != NULL); + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + LZ4_memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + assert(match >= lowPrefix); + + /* copy match within block */ + cpy = op + length; + + /* partialDecoding : may end anywhere within the block */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) { break; } + continue; + } + + if (unlikely(offset<8)) { + LZ4_write32(op, 0); /* silence msan warning when offset==0 */ + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + LZ4_memcpy(op+4, match, 4); + match -= dec64table[offset]; + } else { + LZ4_memcpy(op, match, 8); + match += 8; + } + op += 8; + + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy8(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op < cpy) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, 8); + if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } + } + op = cpy; /* wildcopy correction */ + } + + /* end of decoding */ + DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); + return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + + /* Overflow error detected */ + _output_error: + return (int) (-(((const char*)ip)-src))-1; + } +} + + +/*===== Instantiate the API decoding functions. =====*/ + +LZ4_FORCE_O2 +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, + decode_full_block, noDict, + (BYTE*)dest, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + partial_decode, + noDict, (BYTE*)dst, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast(const char* source, char* dest, int originalSize) +{ + DEBUGLOG(5, "LZ4_decompress_fast"); + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 0, NULL, 0); +} + +/*===== Instantiate a few more decoding cases, used more than once. =====*/ + +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/* Another obsolete API function, paired with the previous one. */ +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +{ + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 64 KB, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, + size_t prefixSize) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize) +{ + DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict"); + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, + int compressedSize, int targetOutputSize, int dstCapacity, + const void* dictStart, size_t dictSize) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 0, (const BYTE*)dictStart, dictSize); +} + +/* The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +LZ4_FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +/*===== streaming decompression functions =====*/ + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_streamDecode_t* LZ4_createStreamDecode(void) +{ + LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal)); + return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); +} + +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +{ + if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ + FREEMEM(LZ4_stream); + return 0; +} +#endif + +/*! LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * @return : 1 if OK, 0 if error + */ +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + lz4sd->prefixSize = (size_t)dictSize; + if (dictSize) { + assert(dictionary != NULL); + lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; + } else { + lz4sd->prefixEnd = (const BYTE*) dictionary; + } + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} + +/*! LZ4_decoderRingBufferSize() : + * when setting a ring buffer for streaming decompression (optional scenario), + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * Note : in a ring buffer scenario, + * blocks are presumed decompressed next to each other. + * When not enough space remains for next block (remainingSize < maxBlockSize), + * decoding resumes from beginning of ring buffer. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +int LZ4_decoderRingBufferSize(int maxBlockSize) +{ + if (maxBlockSize < 0) return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; + if (maxBlockSize < 16) maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); +} + +/* +*_continue() : + These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks must still be available at the memory position where they were decoded. + If it's not possible, save the relevant part of decoded data into a safe buffer, + and indicate where it stands using LZ4_setStreamDecode() +*/ +LZ4_FORCE_O2 +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)result; + lz4sd->prefixEnd += result; + } else { + /* The buffer wraps around, or they're switching to another buffer. */ + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } + + return result; +} + +LZ4_FORCE_O2 int +LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, + const char* source, char* dest, int originalSize) +{ + LZ4_streamDecode_t_internal* const lz4sd = + (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse); + int result; + + DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize); + assert(originalSize >= 0); + + if (lz4sd->prefixSize == 0) { + DEBUGLOG(5, "first invocation : no prefix nor extDict"); + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + DEBUGLOG(5, "continue using existing prefix"); + result = LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)originalSize; + lz4sd->prefixEnd += originalSize; + } else { + DEBUGLOG(5, "prefix becomes extDict"); + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_fast_extDict(source, dest, originalSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } + + return result; +} + + +/* +Advanced decoding functions : +*_usingDict() : + These decoding functions work the same as "_continue" ones, + the dictionary must be explicitly provided within parameters +*/ + +int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) +{ + if (dictSize==0 || dictStart+dictSize == dest) + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + (size_t)dictSize, NULL, 0); + assert(dictSize >= 0); + return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); +} + + +/*=************************************************* +* Obsolete Functions +***************************************************/ +/* obsolete compression functions */ +int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_default(source, dest, inputSize, maxOutputSize); +} +int LZ4_compress(const char* src, char* dest, int srcSize) +{ + return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); +} +int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); +} +int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); +} +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) +{ + return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); +} +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); +} + +/* +These decompression functions are deprecated and should no longer be used. +They are only provided here for compatibility with older user programs. +- LZ4_uncompress is totally equivalent to LZ4_decompress_fast +- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize) +{ + return LZ4_decompress_fast(source, dest, outputSize); +} +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) +{ + return LZ4_decompress_safe(source, dest, isize, maxOutputSize); +} + +/* Obsolete Streaming functions */ + +int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); } + +int LZ4_resetStreamState(void* state, char* inputBuffer) +{ + (void)inputBuffer; + LZ4_resetStream((LZ4_stream_t*)state); + return 0; +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +void* LZ4_create (char* inputBuffer) +{ + (void)inputBuffer; + return LZ4_createStream(); +} +#endif + +char* LZ4_slideInputBuffer (void* state) +{ + /* avoid const char * -> char * conversion warning */ + return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; +} + +#endif /* LZ4_COMMONDEFS_ONLY */ diff --git a/lz4/lz4.h b/lz4/lz4.h new file mode 100644 index 00000000..6ad099d3 --- /dev/null +++ b/lz4/lz4.h @@ -0,0 +1,886 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (c) Yann Collet. All rights reserved. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef LZ4_H_2983827168210 +#define LZ4_H_2983827168210 + +/* --- Dependency --- */ +#include /* size_t */ + + +/** + Introduction + + LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, + scalable with multi-cores CPU. It features an extremely fast decoder, with speed in + multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. + + The LZ4 compression library provides in-memory compression and decompression functions. + It gives full buffer control to user. + Compression can be done in: + - a single step (described as Simple Functions) + - a single step, reusing a context (described in Advanced Functions) + - unbounded multiple steps (described as Streaming compression) + + lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). + Decompressing such a compressed block requires additional metadata. + Exact metadata depends on exact decompression function. + For the typical case of LZ4_decompress_safe(), + metadata includes block's compressed size, and maximum bound of decompressed size. + Each application is free to encode and pass such metadata in whichever way it wants. + + lz4.h only handle blocks, it can not generate Frames. + + Blocks are different from Frames (doc/lz4_Frame_format.md). + Frames bundle both blocks and metadata in a specified manner. + Embedding metadata is required for compressed data to be self-contained and portable. + Frame format is delivered through a companion API, declared in lz4frame.h. + The `lz4` CLI can only manage frames. +*/ + +/*^*************************************************************** +* Export parameters +*****************************************************************/ +/* +* LZ4_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +* LZ4LIB_VISIBILITY : +* Control library symbols visibility. +*/ +#ifndef LZ4LIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define LZ4LIB_VISIBILITY +# endif +#endif +#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) +# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY +#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) +# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define LZ4LIB_API LZ4LIB_VISIBILITY +#endif + +/*! LZ4_FREESTANDING : + * When this macro is set to 1, it enables "freestanding mode" that is + * suitable for typical freestanding environment which doesn't support + * standard C library. + * + * - LZ4_FREESTANDING is a compile-time switch. + * - It requires the following macros to be defined: + * LZ4_memcpy, LZ4_memmove, LZ4_memset. + * - It only enables LZ4/HC functions which don't use heap. + * All LZ4F_* functions are not supported. + * - See tests/freestanding.c to check its basic setup. + */ +#if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1) +# define LZ4_HEAPMODE 0 +# define LZ4HC_HEAPMODE 0 +# define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1 +# if !defined(LZ4_memcpy) +# error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'." +# endif +# if !defined(LZ4_memset) +# error "LZ4_FREESTANDING requires macro 'LZ4_memset'." +# endif +# if !defined(LZ4_memmove) +# error "LZ4_FREESTANDING requires macro 'LZ4_memmove'." +# endif +#elif ! defined(LZ4_FREESTANDING) +# define LZ4_FREESTANDING 0 +#endif + + +/*------ Version ------*/ +#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ +#define LZ4_VERSION_MINOR 10 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ + +#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) + +#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +#define LZ4_QUOTE(str) #str +#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) +#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */ + +LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version; requires v1.3.0+ */ +LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version; requires v1.7.5+ */ + + +/*-************************************ +* Tuning memory usage +**************************************/ +/*! + * LZ4_MEMORY_USAGE : + * Can be selected at compile time, by setting LZ4_MEMORY_USAGE. + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB) + * Increasing memory usage improves compression ratio, generally at the cost of speed. + * Reduced memory usage may improve speed at the cost of ratio, thanks to better cache locality. + * Default value is 14, for 16KB, which nicely fits into most L1 caches. + */ +#ifndef LZ4_MEMORY_USAGE +# define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT +#endif + +/* These are absolute limits, they should not be changed by users */ +#define LZ4_MEMORY_USAGE_MIN 10 +#define LZ4_MEMORY_USAGE_DEFAULT 14 +#define LZ4_MEMORY_USAGE_MAX 20 + +#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN) +# error "LZ4_MEMORY_USAGE is too small !" +#endif + +#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX) +# error "LZ4_MEMORY_USAGE is too large !" +#endif + +/*-************************************ +* Simple Functions +**************************************/ +/*! LZ4_compress_default() : + * Compresses 'srcSize' bytes from buffer 'src' + * into already allocated 'dst' buffer of size 'dstCapacity'. + * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'src' into a more limited 'dst' budget, + * compression stops *immediately*, and the function result is zero. + * In which case, 'dst' content is undefined (invalid). + * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. + * dstCapacity : size of buffer 'dst' (which must be already allocated) + * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) + * or 0 if compression fails + * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). + */ +LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); + +/*! LZ4_decompress_safe() : + * @compressedSize : is the exact complete size of the compressed block. + * @dstCapacity : is the size of destination buffer (which must be already allocated), + * presumed an upper bound of decompressed size. + * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + * If destination buffer is not large enough, decoding will stop and output an error code (negative value). + * If the source stream is detected malformed, the function will stop decoding and return a negative result. + * Note 1 : This function is protected against malicious data packets : + * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, + * even if the compressed block is maliciously modified to order the decoder to do these actions. + * In such case, the decoder stops immediately, and considers the compressed block malformed. + * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. + * The implementation is free to send / store / derive this information in whichever way is most beneficial. + * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. + */ +LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); + + +/*-************************************ +* Advanced Functions +**************************************/ +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4__COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/*! LZ4_compressBound() : + Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) + This function is primarily useful for memory allocation purposes (destination buffer size). + Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). + Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) + inputSize : max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is incorrect (too large or negative) +*/ +LZ4LIB_API int LZ4_compressBound(int inputSize); + +/*! LZ4_compress_fast() : + Same as LZ4_compress_default(), but allows selection of "acceleration" factor. + The larger the acceleration value, the faster the algorithm, but also the lesser the compression. + It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. + An acceleration value of "1" is the same as regular LZ4_compress_default() + Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). + Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). +*/ +LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_fast_extState() : + * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. + * Use LZ4_sizeofState() to know how much memory must be allocated, + * and allocate it on 8-bytes boundaries (using `malloc()` typically). + * Then, provide this buffer as `void* state` to compression function. + */ +LZ4LIB_API int LZ4_sizeofState(void); +LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_compress_destSize() : + * Reverse the logic : compresses as much data as possible from 'src' buffer + * into already allocated buffer 'dst', of size >= 'dstCapacity'. + * This function either compresses the entire 'src' content into 'dst' if it's large enough, + * or fill 'dst' buffer completely with as much data as possible from 'src'. + * note: acceleration parameter is fixed to "default". + * + * *srcSizePtr : in+out parameter. Initially contains size of input. + * Will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + * New value is necessarily <= input value. + * @return : Nb bytes written into 'dst' (necessarily <= dstCapacity) + * or 0 if compression fails. + * + * Note : 'targetDstSize' must be >= 1, because it's the smallest valid lz4 payload. + * + * Note 2:from v1.8.2 to v1.9.1, this function had a bug (fixed in v1.9.2+): + * the produced compressed content could, in rare circumstances, + * require to be decompressed into a destination buffer + * larger by at least 1 byte than decompressesSize. + * If an application uses `LZ4_compress_destSize()`, + * it's highly recommended to update liblz4 to v1.9.2 or better. + * If this can't be done or ensured, + * the receiving decompression function should provide + * a dstCapacity which is > decompressedSize, by at least 1 byte. + * See https://github.com/lz4/lz4/issues/859 for details + */ +LZ4LIB_API int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize); + +/*! LZ4_decompress_safe_partial() : + * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + * into destination buffer 'dst' of size 'dstCapacity'. + * Up to 'targetOutputSize' bytes will be decoded. + * The function stops decoding on reaching this objective. + * This can be useful to boost performance + * whenever only the beginning of a block is required. + * + * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) + * If source stream is detected malformed, function returns a negative result. + * + * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. + * + * Note 2 : targetOutputSize must be <= dstCapacity + * + * Note 3 : this function effectively stops decoding on reaching targetOutputSize, + * so dstCapacity is kind of redundant. + * This is because in older versions of this function, + * decoding operation would still write complete sequences. + * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, + * it could write more bytes, though only up to dstCapacity. + * Some "margin" used to be required for this operation to work properly. + * Thankfully, this is no longer necessary. + * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + * + * Note 4 : If srcSize is the exact size of the block, + * then targetOutputSize can be any value, + * including larger than the block's decompressed size. + * The function will, at most, generate block's decompressed size. + * + * Note 5 : If srcSize is _larger_ than block's compressed size, + * then targetOutputSize **MUST** be <= block's decompressed size. + * Otherwise, *silent corruption will occur*. + */ +LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); + + +/*-********************************************* +* Streaming Compression Functions +***********************************************/ +typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ + +/*! + Note about RC_INVOKED + + - RC_INVOKED is predefined symbol of rc.exe (the resource compiler which is part of MSVC/Visual Studio). + https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros + + - Since rc.exe is a legacy compiler, it truncates long symbol (> 30 chars) + and reports warning "RC4011: identifier truncated". + + - To eliminate the warning, we surround long preprocessor symbol with + "#if !defined(RC_INVOKED) ... #endif" block that means + "skip this block when rc.exe is trying to read it". +*/ +#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); +LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); +#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ +#endif + +/*! LZ4_resetStream_fast() : v1.9.0+ + * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks + * (e.g., LZ4_compress_fast_continue()). + * + * An LZ4_stream_t must be initialized once before usage. + * This is automatically done when created by LZ4_createStream(). + * However, should the LZ4_stream_t be simply declared on stack (for example), + * it's necessary to initialize it first, using LZ4_initStream(). + * + * After init, start any new stream with LZ4_resetStream_fast(). + * A same LZ4_stream_t can be re-used multiple times consecutively + * and compress multiple streams, + * provided that it starts each new stream with LZ4_resetStream_fast(). + * + * LZ4_resetStream_fast() is much faster than LZ4_initStream(), + * but is not compatible with memory regions containing garbage data. + * + * Note: it's only useful to call LZ4_resetStream_fast() + * in the context of streaming compression. + * The *extState* functions perform their own resets. + * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. + */ +LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); + +/*! LZ4_loadDict() : + * Use this function to reference a static dictionary into LZ4_stream_t. + * The dictionary must remain available during compression. + * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. + * The same dictionary will have to be loaded on decompression side for successful decoding. + * Dictionary are useful for better compression of small data (KB range). + * While LZ4 itself accepts any input as dictionary, dictionary efficiency is also a topic. + * When in doubt, employ the Zstandard's Dictionary Builder. + * Loading a size of 0 is allowed, and is the same as reset. + * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded) + */ +LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_loadDictSlow() : v1.10.0+ + * Same as LZ4_loadDict(), + * but uses a bit more cpu to reference the dictionary content more thoroughly. + * This is expected to slightly improve compression ratio. + * The extra-cpu cost is likely worth it if the dictionary is re-used across multiple sessions. + * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded) + */ +LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_attach_dictionary() : stable since v1.10.0 + * + * This allows efficient re-use of a static dictionary multiple times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a + * working LZ4_stream_t, this function introduces a no-copy setup mechanism, + * in which the working stream references @dictionaryStream in-place. + * + * Several assumptions are made about the state of @dictionaryStream. + * Currently, only states which have been prepared by LZ4_loadDict() or + * LZ4_loadDictSlow() should be expected to work. + * + * Alternatively, the provided @dictionaryStream may be NULL, + * in which case any existing dictionary stream is unset. + * + * If a dictionary is provided, it replaces any pre-existing stream history. + * The dictionary contents are the only history that can be referenced and + * logically immediately precede the data compressed in the first subsequent + * compression call. + * + * The dictionary will only remain attached to the working stream through the + * first compression call, at the end of which it is cleared. + * @dictionaryStream stream (and source buffer) must remain in-place / accessible / unchanged + * through the completion of the compression session. + * + * Note: there is no equivalent LZ4_attach_*() method on the decompression side + * because there is no initialization cost, hence no need to share the cost across multiple sessions. + * To decompress LZ4 blocks using dictionary, attached or not, + * just employ the regular LZ4_setStreamDecode() for streaming, + * or the stateless LZ4_decompress_safe_usingDict() for one-shot decompression. + */ +LZ4LIB_API void +LZ4_attach_dictionary(LZ4_stream_t* workingStream, + const LZ4_stream_t* dictionaryStream); + +/*! LZ4_compress_fast_continue() : + * Compress 'src' content using data from previously compressed blocks, for better compression ratio. + * 'dst' buffer must be already allocated. + * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. + * + * @return : size of compressed block + * or 0 if there is an error (typically, cannot fit into 'dst'). + * + * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. + * Each block has precise boundaries. + * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. + * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + * + * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! + * + * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + * Make sure that buffers are separated, by at least one byte. + * This construction ensures that each block only depends on previous block. + * + * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. + */ +LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_saveDict() : + * If last 64KB data cannot be guaranteed to remain available at its current memory location, + * save it into a safer place (char* safeBuffer). + * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), + * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. + * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. + */ +LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); + + +/*-********************************************** +* Streaming Decompression Functions +* Bufferless synchronous API +************************************************/ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ + +/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : + * creation / destruction of streaming decompression tracking context. + * A tracking context can be re-used multiple times. + */ +#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); +LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); +#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ +#endif + +/*! LZ4_setStreamDecode() : + * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. + * Use this function to start decompression of a new stream of blocks. + * A dictionary can optionally be set. Use NULL or size 0 for a reset order. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. + * @return : 1 if OK, 0 if error + */ +LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); + +/*! LZ4_decoderRingBufferSize() : v1.8.2+ + * Note : in a ring buffer scenario (optional), + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * When setting such a ring buffer for streaming decompression, + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); +#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ + +/*! LZ4_decompress_safe_continue() : + * This decoding function allows decompression of consecutive blocks in "streaming" mode. + * The difference with the usual independent blocks is that + * new blocks are allowed to find references into former blocks. + * A block is an unsplittable entity, and must be presented entirely to the decompression function. + * LZ4_decompress_safe_continue() only accepts one block at a time. + * It's modeled after `LZ4_decompress_safe()` and behaves similarly. + * + * @LZ4_streamDecode : decompression state, tracking the position in memory of past data + * @compressedSize : exact complete size of one compressed block. + * @dstCapacity : size of destination buffer (which must be already allocated), + * must be an upper bound of decompressed size. + * @return : number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + * If destination buffer is not large enough, decoding will stop and output an error code (negative value). + * If the source stream is detected malformed, the function will stop decoding and return a negative result. + * + * The last 64KB of previously decoded data *must* remain available and unmodified + * at the memory position where they were previously decoded. + * If less than 64KB of data has been decoded, all the data must be present. + * + * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : + * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). + * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. + * In which case, encoding and decoding buffers do not need to be synchronized. + * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. + * - Synchronized mode : + * Decompression buffer size is _exactly_ the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions), + * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), + * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). + * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including small ones ( < 64 KB). + * + * Whenever these conditions are not possible, + * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, + * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. +*/ +LZ4LIB_API int +LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, + const char* src, char* dst, + int srcSize, int dstCapacity); + + +/*! LZ4_decompress_safe_usingDict() : + * Works the same as + * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_safe_continue() + * However, it's stateless: it doesn't need any LZ4_streamDecode_t state. + * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. + * Performance tip : Decompression speed can be substantially increased + * when dst == dictStart + dictSize. + */ +LZ4LIB_API int +LZ4_decompress_safe_usingDict(const char* src, char* dst, + int srcSize, int dstCapacity, + const char* dictStart, int dictSize); + +/*! LZ4_decompress_safe_partial_usingDict() : + * Behaves the same as LZ4_decompress_safe_partial() + * with the added ability to specify a memory segment for past data. + * Performance tip : Decompression speed can be substantially increased + * when dst == dictStart + dictSize. + */ +LZ4LIB_API int +LZ4_decompress_safe_partial_usingDict(const char* src, char* dst, + int compressedSize, + int targetOutputSize, int maxOutputSize, + const char* dictStart, int dictSize); + +#endif /* LZ4_H_2983827168210 */ + + +/*^************************************* + * !!!!!! STATIC LINKING ONLY !!!!!! + ***************************************/ + +/*-**************************************************************************** + * Experimental section + * + * Symbols declared in this section must be considered unstable. Their + * signatures or semantics may change, or they may be removed altogether in the + * future. They are therefore only safe to depend on when the caller is + * statically linked against the library. + * + * To protect against unsafe usage, not only are the declarations guarded, + * the definitions are hidden by default + * when building LZ4 as a shared/dynamic library. + * + * In order to access these declarations, + * define LZ4_STATIC_LINKING_ONLY in your application + * before including LZ4's headers. + * + * In order to make their implementations accessible dynamically, you must + * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. + ******************************************************************************/ + +#ifdef LZ4_STATIC_LINKING_ONLY + +#ifndef LZ4_STATIC_3504398509 +#define LZ4_STATIC_3504398509 + +#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS +# define LZ4LIB_STATIC_API LZ4LIB_API +#else +# define LZ4LIB_STATIC_API +#endif + + +/*! LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. + * It is only safe to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). + * From a high level, the difference is that + * this function initializes the provided state with a call to something like LZ4_resetStream_fast() + * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). + */ +LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_compress_destSize_extState() : introduced in v1.10.0 + * Same as LZ4_compress_destSize(), but using an externally allocated state. + * Also: exposes @acceleration + */ +int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration); + +/*! In-place compression and decompression + * + * It's possible to have input and output sharing the same buffer, + * for highly constrained memory environments. + * In both cases, it requires input to lay at the end of the buffer, + * and decompression to start at beginning of the buffer. + * Buffer size must feature some margin, hence be larger than final size. + * + * |<------------------------buffer--------------------------------->| + * |<-----------compressed data--------->| + * |<-----------decompressed size------------------>| + * |<----margin---->| + * + * This technique is more useful for decompression, + * since decompressed size is typically larger, + * and margin is short. + * + * In-place decompression will work inside any buffer + * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). + * This presumes that decompressedSize > compressedSize. + * Otherwise, it means compression actually expanded data, + * and it would be more efficient to store such data with a flag indicating it's not compressed. + * This can happen when data is not compressible (already compressed, or encrypted). + * + * For in-place compression, margin is larger, as it must be able to cope with both + * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, + * and data expansion, which can happen when input is not compressible. + * As a consequence, buffer size requirements are much higher, + * and memory savings offered by in-place compression are more limited. + * + * There are ways to limit this cost for compression : + * - Reduce history size, by modifying LZ4_DISTANCE_MAX. + * Note that it is a compile-time constant, so all compressions will apply this limit. + * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, + * so it's a reasonable trick when inputs are known to be small. + * - Require the compressor to deliver a "maximum compressed size". + * This is the `dstCapacity` parameter in `LZ4_compress*()`. + * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, + * in which case, the return code will be 0 (zero). + * The caller must be ready for these cases to happen, + * and typically design a backup scheme to send data uncompressed. + * The combination of both techniques can significantly reduce + * the amount of margin required for in-place compression. + * + * In-place compression can work in any buffer + * which size is >= (maxCompressedSize) + * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. + * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, + * so it's possible to reduce memory requirements by playing with them. + */ + +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) +#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ +#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ + +#endif /* LZ4_STATIC_3504398509 */ +#endif /* LZ4_STATIC_LINKING_ONLY */ + + + +#ifndef LZ4_H_98237428734687 +#define LZ4_H_98237428734687 + +/*-************************************************************ + * Private Definitions + ************************************************************** + * Do not use these definitions directly. + * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. + * Accessing members will expose user code to API and/or ABI break in future versions of the library. + **************************************************************/ +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ + +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef int8_t LZ4_i8; + typedef unsigned char LZ4_byte; + typedef uint16_t LZ4_u16; + typedef uint32_t LZ4_u32; +#else + typedef signed char LZ4_i8; + typedef unsigned char LZ4_byte; + typedef unsigned short LZ4_u16; + typedef unsigned int LZ4_u32; +#endif + +/*! LZ4_stream_t : + * Never ever use below internal definitions directly ! + * These definitions are not API/ABI safe, and may change in future versions. + * If you need static allocation, declare or allocate an LZ4_stream_t object. +**/ + +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + const LZ4_byte* dictionary; + const LZ4_stream_t_internal* dictCtx; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + LZ4_u32 dictSize; + /* Implicit padding to ensure structure is aligned */ +}; + +#define LZ4_STREAM_MINSIZE ((1UL << (LZ4_MEMORY_USAGE)) + 32) /* static size, for inter-version compatibility */ +union LZ4_stream_u { + char minStateSize[LZ4_STREAM_MINSIZE]; + LZ4_stream_t_internal internal_donotuse; +}; /* previously typedef'd to LZ4_stream_t */ + + +/*! LZ4_initStream() : v1.9.0+ + * An LZ4_stream_t structure must be initialized at least once. + * This is automatically done when invoking LZ4_createStream(), + * but it's not when the structure is simply declared on stack (for example). + * + * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. + * It can also initialize any arbitrary buffer of sufficient size, + * and will @return a pointer of proper type upon initialization. + * + * Note : initialization fails if size and alignment conditions are not respected. + * In which case, the function will @return NULL. + * Note2: An LZ4_stream_t structure guarantees correct alignment and size. + * Note3: Before v1.9.0, use LZ4_resetStream() instead +**/ +LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* stateBuffer, size_t size); + + +/*! LZ4_streamDecode_t : + * Never ever use below internal definitions directly ! + * These definitions are not API/ABI safe, and may change in future versions. + * If you need static allocation, declare or allocate an LZ4_streamDecode_t object. +**/ +typedef struct { + const LZ4_byte* externalDict; + const LZ4_byte* prefixEnd; + size_t extDictSize; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + +#define LZ4_STREAMDECODE_MINSIZE 32 +union LZ4_streamDecode_u { + char minStateSize[LZ4_STREAMDECODE_MINSIZE]; + LZ4_streamDecode_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_streamDecode_t */ + + + +/*-************************************ +* Obsolete Functions +**************************************/ + +/*! Deprecation warnings + * + * Deprecated functions make the compiler generate a warning when invoked. + * This is meant to invite users to update their source code. + * Should deprecation warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * + * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS + * before including the header file. + */ +#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define LZ4_DEPRECATED(message) [[deprecated(message)]] +# elif defined(_MSC_VER) +# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ +# endif +#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ + +/*! Obsolete compression functions (since v1.7.3) */ +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/*! Obsolete decompression functions (since v1.8.0) */ +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + +/* Obsolete streaming functions (since v1.7.0) + * degraded functionality; do not use! + * + * In order to perform streaming compression, these functions depended on data + * that is no longer tracked in the state. They have been preserved as well as + * possible: using them will still produce a correct output. However, they don't + * actually retain any history between compression calls. The compression ratio + * achieved will therefore be no better than compressing each chunk + * independently. + */ +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); +LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); + +/*! Obsolete streaming decoding functions (since v1.7.0) */ +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); + +/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : + * These functions used to be faster than LZ4_decompress_safe(), + * but this is no longer the case. They are now slower. + * This is because LZ4_decompress_fast() doesn't know the input size, + * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. + * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. + * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. + * + * The last remaining LZ4_decompress_fast() specificity is that + * it can decompress a block without knowing its compressed size. + * Such functionality can be achieved in a more secure manner + * by employing LZ4_decompress_safe_partial(). + * + * Parameters: + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * The function expects to finish at block's end exactly. + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. + * Also, since match offsets are not validated, match reads from 'src' may underflow too. + * These issues never happen if input (compressed) data is correct. + * But they may happen if input data is invalid (error or intentional tampering). + * As a consequence, use these functions in trusted environments with trusted data **only**. + */ +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial() instead") +LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider migrating towards LZ4_decompress_safe_continue() instead. " + "Note that the contract will change (requires block's compressed size, instead of decompressed size)") +LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); + +/*! LZ4_resetStream() : + * An LZ4_stream_t structure must be initialized at least once. + * This is done with LZ4_initStream(), or LZ4_resetStream(). + * Consider switching to LZ4_initStream(), + * invoking LZ4_resetStream() will trigger deprecation warnings in the future. + */ +LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); + + +#endif /* LZ4_H_98237428734687 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/lz4/xxhash.c b/lz4/xxhash.c new file mode 100644 index 00000000..e1326196 --- /dev/null +++ b/lz4/xxhash.c @@ -0,0 +1,1030 @@ +/* +* xxHash - Fast Hash algorithm +* Copyright (C) 2012-2016, Yann Collet +* +* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following disclaimer +* in the documentation and/or other materials provided with the +* distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* You can contact the author at : +* - xxHash homepage: http://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. + * When this macro is enabled, xxHash actively checks input for null pointer. + * It it is, result for null input pointers is the same as a null-length input. + */ +#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ +# define XXH_ACCEPT_NULL_INPUT_POINTER 0 +#endif + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; + * set it to 0 when the input is guaranteed to be aligned, + * or when alignment doesn't matter for performance. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/*! Modify the local functions below should you wish to use some other memory routines +* for malloc(), free() */ +#include +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/*! and for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#include /* assert */ + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#if defined (_MSC_VER) && !defined (__clang__) /* MSVC */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# define FORCE_INLINE static __forceinline +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# if defined (__GNUC__) || defined (__clang__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; +# endif +#endif + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; } __attribute__((packed)) unalign; +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianness; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN +static int XXH_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianness endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianness endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ******************************************************************* +* 32-bit hash functions +*********************************************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +/* mix all bits */ +static U32 XXH32_avalanche(U32 h32) +{ + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + return(h32); +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +static U32 +XXH32_finalize(U32 h32, const void* ptr, size_t len, + XXH_endianness endian, XXH_alignment align) + +{ + const BYTE* p = (const BYTE*)ptr; + +#define PROCESS1 \ + h32 += (*p++) * PRIME32_5; \ + h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; + +#define PROCESS4 \ + h32 += XXH_get32bits(p) * PRIME32_3; \ + p+=4; \ + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + + switch(len&15) /* or switch(bEnd - p) */ + { + case 12: PROCESS4; + /* fallthrough */ + case 8: PROCESS4; + /* fallthrough */ + case 4: PROCESS4; + return XXH32_avalanche(h32); + + case 13: PROCESS4; + /* fallthrough */ + case 9: PROCESS4; + /* fallthrough */ + case 5: PROCESS4; + PROCESS1; + return XXH32_avalanche(h32); + + case 14: PROCESS4; + /* fallthrough */ + case 10: PROCESS4; + /* fallthrough */ + case 6: PROCESS4; + PROCESS1; + PROCESS1; + return XXH32_avalanche(h32); + + case 15: PROCESS4; + /* fallthrough */ + case 11: PROCESS4; + /* fallthrough */ + case 7: PROCESS4; + /* fallthrough */ + case 3: PROCESS1; + /* fallthrough */ + case 2: PROCESS1; + /* fallthrough */ + case 1: PROCESS1; + /* fallthrough */ + case 0: return XXH32_avalanche(h32); + } + assert(0); + return h32; /* reaching this point is deemed impossible */ +} + + +FORCE_INLINE U32 +XXH32_endian_align(const void* input, size_t len, U32 seed, + XXH_endianness endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 15; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p < limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32)len; + + return XXH32_finalize(h32, p, len&15, endian, align); +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, input, len); + return XXH32_digest(&state); +#else + XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + + +/*====== Hash streaming ======*/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + + +FORCE_INLINE XXH_errorcode +XXH32_update_endian(XXH32_state_t* state, const void* input, size_t len, XXH_endianness endian) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + +FORCE_INLINE U32 +XXH32_digest_endian (const XXH32_state_t* state, XXH_endianness endian) +{ + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + + XXH_rotl32(state->v2, 7) + + XXH_rotl32(state->v3, 12) + + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned); +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + +/*====== Canonical representation ======*/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, remaining comparable across different systems. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +* 64-bit hash functions +*********************************************************************/ + +/*====== Memory access ======*/ + +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t U64; +# else + /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ + typedef unsigned long long U64; +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; +static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianness endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianness endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/*====== xxh64 ======*/ + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +static U64 XXH64_avalanche(U64 h64) +{ + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +static U64 +XXH64_finalize(U64 h64, const void* ptr, size_t len, + XXH_endianness endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)ptr; + +#define PROCESS1_64 \ + h64 ^= (*p++) * PRIME64_5; \ + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + +#define PROCESS4_64 \ + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ + p+=4; \ + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + +#define PROCESS8_64 { \ + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ + p+=8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ +} + + switch(len&31) { + case 24: PROCESS8_64; + /* fallthrough */ + case 16: PROCESS8_64; + /* fallthrough */ + case 8: PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: PROCESS8_64; + /* fallthrough */ + case 20: PROCESS8_64; + /* fallthrough */ + case 12: PROCESS8_64; + /* fallthrough */ + case 4: PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: PROCESS8_64; + /* fallthrough */ + case 17: PROCESS8_64; + /* fallthrough */ + case 9: PROCESS8_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: PROCESS8_64; + /* fallthrough */ + case 21: PROCESS8_64; + /* fallthrough */ + case 13: PROCESS8_64; + /* fallthrough */ + case 5: PROCESS4_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: PROCESS8_64; + /* fallthrough */ + case 18: PROCESS8_64; + /* fallthrough */ + case 10: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: PROCESS8_64; + /* fallthrough */ + case 22: PROCESS8_64; + /* fallthrough */ + case 14: PROCESS8_64; + /* fallthrough */ + case 6: PROCESS4_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: PROCESS8_64; + /* fallthrough */ + case 19: PROCESS8_64; + /* fallthrough */ + case 11: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: PROCESS8_64; + /* fallthrough */ + case 23: PROCESS8_64; + /* fallthrough */ + case 15: PROCESS8_64; + /* fallthrough */ + case 7: PROCESS4_64; + /* fallthrough */ + case 3: PROCESS1_64; + /* fallthrough */ + case 2: PROCESS1_64; + /* fallthrough */ + case 1: PROCESS1_64; + /* fallthrough */ + case 0: return XXH64_avalanche(h64); + } + + /* impossible to reach */ + assert(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +FORCE_INLINE U64 +XXH64_endian_align(const void* input, size_t len, U64 seed, + XXH_endianness endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h64; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + return XXH64_finalize(h64, p, len, endian, align); +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, input, len); + return XXH64_digest(&state); +#else + XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/*====== Hash Streaming ======*/ + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + +FORCE_INLINE XXH_errorcode +XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianness endian) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianness endian) +{ + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 /*seed*/ + PRIME64_5; + } + + h64 += (U64) state->total_len; + + return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned); +} + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/*====== Canonical representation ======*/ + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + +#endif /* XXH_NO_LONG_LONG */ diff --git a/lz4/xxhash.h b/lz4/xxhash.h new file mode 100644 index 00000000..d6bad943 --- /dev/null +++ b/lz4/xxhash.h @@ -0,0 +1,328 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bit version, named XXH64, is available since r35. +It offers much better speed, but for 64-bit applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** + * API modifier + ******************************/ +/** XXH_INLINE_ALL (and XXH_PRIVATE_API) + * This is useful to include xxhash functions in `static` mode + * in order to inline them, and remove their symbol from the public list. + * Inlining can offer dramatic performance improvement on small keys. + * Methodology : + * #define XXH_INLINE_ALL + * #include "xxhash.h" + * `xxhash.c` is automatically included. + * It's not useful to compile and link it as a separate module. + */ +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else + /* this version may generate warnings for unused static functions */ +# define XXH_PUBLIC_API static +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ + +/*! XXH_NAMESPACE, aka Namespace Emulation : + * + * If you want to include _and expose_ xxHash functions from within your own library, + * but also want to avoid symbol collisions with other libraries which may also include xxHash, + * + * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library + * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + * + * Note that no change is required within the calling program as long as it includes `xxhash.h` : + * regular symbol name will be automatically translated by this header. + */ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 5 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/*-********************************************************************** +* 32-bit hash +************************************************************************/ +typedef unsigned int XXH32_hash_t; + +/*! XXH32() : + Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); + +/*====== Streaming ======*/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +/* + * Streaming functions generate the xxHash of an input provided in multiple segments. + * Note that, for small input, they are slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * XXH state must first be allocated, using XXH*_createState() . + * + * Start a new hash by initializing state with a seed, using XXH*_reset(). + * + * Then, feed the hash state by calling XXH*_update() as many times as necessary. + * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using XXH*_digest(). + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a digest, + * and generate some new hashes later on, by calling again XXH*_digest(). + * + * When done, free XXH state space if it was allocated dynamically. + */ + +/*====== Canonical representation ======*/ + +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); + +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. + * The canonical representation uses human-readable write convention, aka big-endian (large digits first). + * These functions allow transformation of hash result into and from its canonical format. + * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. + */ + + +#ifndef XXH_NO_LONG_LONG +/*-********************************************************************** +* 64-bit hash +************************************************************************/ +typedef unsigned long long XXH64_hash_t; + +/*! XXH64() : + Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). +*/ +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*====== Streaming ======*/ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/*====== Canonical representation ======*/ +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); +#endif /* XXH_NO_LONG_LONG */ + + + +#ifdef XXH_STATIC_LINKING_ONLY + +/* ================================================================================================ + This section contains declarations which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + These declarations should only be used with static linking. + Never use them in association with dynamic linking ! +=================================================================================================== */ + +/* These definitions are only present to allow + * static allocation of XXH state, on stack or in a struct for example. + * Never **ever** use members directly. */ + +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + +struct XXH32_state_s { + uint32_t total_len_32; + uint32_t large_len; + uint32_t v1; + uint32_t v2; + uint32_t v3; + uint32_t v4; + uint32_t mem32[4]; + uint32_t memsize; + uint32_t reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + +struct XXH64_state_s { + uint64_t total_len; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t mem64[4]; + uint32_t memsize; + uint32_t reserved[2]; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ + +# else + +struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; + unsigned memsize; + unsigned reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + +# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ +struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; + unsigned memsize; + unsigned reserved[2]; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ +# endif + +# endif + + +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ +#endif + +#endif /* XXH_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* XXHASH_H_5627135585666179 */ diff --git a/minutor.pro b/minutor.pro index e2286340..8641f678 100644 --- a/minutor.pro +++ b/minutor.pro @@ -32,6 +32,8 @@ HEADERS += \ identifier/entityidentifier.h \ identifier/flatteningconverter.h \ jumpto.h \ + lz4/lz4.h \ + lz4/xxhash.h \ mapview.h \ minutor.h \ nbt/nbt.h \ @@ -79,6 +81,8 @@ SOURCES += \ identifier/entityidentifier.cpp \ identifier/flatteningconverter.cpp \ jumpto.cpp \ + lz4/lz4.c \ + lz4/xxhash.c \ main.cpp \ mapview.cpp \ minutor.cpp \ @@ -136,7 +140,7 @@ SOURCES += \ zlib/uncompr.c \ zlib/zutil.c -INCLUDEPATH += identifer json nbt overlay search zlib +INCLUDEPATH += identifer json nbt overlay search zlib lz4 } desktopfile.path = /usr/share/applications From ede9bcdef11e2a14b820b353a5a496b8b31b109c Mon Sep 17 00:00:00 2001 From: EtlamGit Date: Sat, 27 Dec 2025 23:55:27 +0100 Subject: [PATCH 3/5] add Chunk decompressor for LZ4 --- chunkloader.cpp | 4 +-- nbt/nbt.cpp | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ nbt/nbt.h | 1 + 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/chunkloader.cpp b/chunkloader.cpp index fb0f95f7..8cb281ee 100644 --- a/chunkloader.cpp +++ b/chunkloader.cpp @@ -85,8 +85,8 @@ bool ChunkLoader::loadNbtHelper(QString filename, int cx, int cz, QSharedPointer // Read chunk header to get actual data length f.seek(chunkStart); - char headerBuf[5]; - if (f.read(headerBuf, 5) != 5) { + char headerBuf[4]; + if (f.read(headerBuf, 4) != 4) { f.close(); return false; } diff --git a/nbt/nbt.cpp b/nbt/nbt.cpp index 99275fb8..a6f42266 100644 --- a/nbt/nbt.cpp +++ b/nbt/nbt.cpp @@ -4,6 +4,7 @@ #include #include "nbt/nbt.h" +#include "lz4/lz4.h" // this handles decoding the gzipped level.dat @@ -29,6 +30,8 @@ NBT::NBT(const uchar *chunk) // supported compression formats if (chunk[4] == 1) unpack_zlib(data, length - 1, 15 + 16); // rfc1952 not used by official Minecraft if (chunk[4] == 2) unpack_zlib(data, length - 1, 15 + 0); // rfc1950 default for all Chunk data + if (chunk[4] == 3) {;} + if (chunk[4] == 4) unpack_lz4(data, length - 1); // LZ4 compression } Tag NBT::Null; @@ -69,6 +72,79 @@ void NBT::unpack_zlib(const unsigned char * data, unsigned long length, int wind } } + +// Minecraft uses an incompatible Java implementation of LZ4 +// -> so we have to decode magic headers by ourself +static const char LZ4_MAGIC[] = {'L', 'Z', '4', 'B', 'l', 'o', 'c', 'k'}; +static const int LZ4_MAGIC_LENGTH = sizeof(LZ4_MAGIC); + +static const int LZ4_COMPRESSION_METHOD_RAW = 0x10; +static const int LZ4_COMPRESSION_METHOD_LZ4 = 0x20; +static const int LZ4_COMPRESSION_LEVEL_BASE = 10; +static const int LZ4_DEFAULT_SEED = 0x9747b28c; + +// read an int from Little Endian byte stream +long readIntLE(const unsigned char * data) { + return (data[3] << 24) | (data[2] << 16) | (data[1] << 8) | (data[0]); +} + +void NBT::unpack_lz4(const unsigned char * data, unsigned long length) { + if (length < LZ4_MAGIC_LENGTH+13) return; + + QByteArray nbt; + + const unsigned char * input = data; + + while ((input - data) < length) { + // decode LZ4-Java block header + for (int m=0; m length) return; + + if (compression_method == LZ4_COMPRESSION_METHOD_RAW) { + // copy RAW block + nbt.append(reinterpret_cast(input), original_length); + } else { + // decompress one block + char * output = reinterpret_cast(malloc(original_length)); + int ret = LZ4_decompress_safe(reinterpret_cast(input), output, + compressed_length, original_length); + nbt.append(output, ret); + free(output); + } + // advance input data pointer + input += compressed_length; + } + + TagDataStream s(nbt.constData(), nbt.size()); + + if (s.r8() == Tag::TAG_COMPOUND) { // outer compound is expected + s.skip(s.r16()); // skip name (should be empty anyways) + root = new Tag_Compound(&s); + } +} + + bool NBT::has(const QString key) const { return root->has(key); } diff --git a/nbt/nbt.h b/nbt/nbt.h index 35287b26..62c1bc27 100644 --- a/nbt/nbt.h +++ b/nbt/nbt.h @@ -20,6 +20,7 @@ class NBT { private: void unpack_zlib(const unsigned char * data, unsigned long length, int windowsize = 15); + void unpack_lz4(const unsigned char * data, unsigned long length); Tag * root; }; From bfef9765a00287051a86b7e724064a8f17f71f6f Mon Sep 17 00:00:00 2001 From: EtlamGit Date: Sun, 28 Dec 2025 14:24:27 +0100 Subject: [PATCH 4/5] support for uncompressed Chunks "format 3" uncompressed means that we can just copy the NBT data --- nbt/nbt.cpp | 20 +++++++++++--------- nbt/nbt.h | 2 ++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/nbt/nbt.cpp b/nbt/nbt.cpp index a6f42266..1171bf8c 100644 --- a/nbt/nbt.cpp +++ b/nbt/nbt.cpp @@ -30,7 +30,7 @@ NBT::NBT(const uchar *chunk) // supported compression formats if (chunk[4] == 1) unpack_zlib(data, length - 1, 15 + 16); // rfc1952 not used by official Minecraft if (chunk[4] == 2) unpack_zlib(data, length - 1, 15 + 0); // rfc1950 default for all Chunk data - if (chunk[4] == 3) {;} + if (chunk[4] == 3) decode_nbt(data, length - 1); // copy uncompressed data if (chunk[4] == 4) unpack_lz4(data, length - 1); // LZ4 compression } @@ -64,12 +64,7 @@ void NBT::unpack_zlib(const unsigned char * data, unsigned long length, int wind } while (stream.avail_out == 0); inflateEnd(&stream); - TagDataStream s(nbt.constData(), nbt.size()); - - if (s.r8() == Tag::TAG_COMPOUND) { // outer compound is expected - s.skip(s.r16()); // skip name (should be empty anyways) - root = new Tag_Compound(&s); - } + decode_nbt(nbt.constData(), nbt.size()); } @@ -136,7 +131,15 @@ void NBT::unpack_lz4(const unsigned char * data, unsigned long length) { input += compressed_length; } - TagDataStream s(nbt.constData(), nbt.size()); + decode_nbt(nbt.constData(), nbt.size()); +} + +void NBT::decode_nbt(const unsigned char * data, unsigned long length) { + decode_nbt(reinterpret_cast(data), length); +} + +void NBT::decode_nbt(const char * data, unsigned long length) { + TagDataStream s(data, length); if (s.r8() == Tag::TAG_COMPOUND) { // outer compound is expected s.skip(s.r16()); // skip name (should be empty anyways) @@ -144,7 +147,6 @@ void NBT::unpack_lz4(const unsigned char * data, unsigned long length) { } } - bool NBT::has(const QString key) const { return root->has(key); } diff --git a/nbt/nbt.h b/nbt/nbt.h index 62c1bc27..76a6cc88 100644 --- a/nbt/nbt.h +++ b/nbt/nbt.h @@ -21,6 +21,8 @@ class NBT { private: void unpack_zlib(const unsigned char * data, unsigned long length, int windowsize = 15); void unpack_lz4(const unsigned char * data, unsigned long length); + void decode_nbt(const unsigned char * data, unsigned long length); + void decode_nbt(const char * data, unsigned long length); Tag * root; }; From b5220140dce70f5fe625900c83bd656d582e4e80 Mon Sep 17 00:00:00 2001 From: EtlamGit Date: Mon, 29 Dec 2025 13:22:57 +0100 Subject: [PATCH 5/5] use checksum to detect corrupt Chunk data --- nbt/nbt.cpp | 73 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/nbt/nbt.cpp b/nbt/nbt.cpp index 1171bf8c..b3e73a62 100644 --- a/nbt/nbt.cpp +++ b/nbt/nbt.cpp @@ -6,6 +6,9 @@ #include "nbt/nbt.h" #include "lz4/lz4.h" +#define XXH_INLINE_ALL +#include "lz4/xxhash.h" + // this handles decoding the gzipped level.dat NBT::NBT(const QString level) @@ -17,7 +20,8 @@ NBT::NBT(const QString level) f.close(); // level.dat is typically gzip format, but autodetect here - unpack_zlib(reinterpret_cast(data.data()), data.size(), 15 + 32); // +32 to autodetect gzip/zlib compression + // +32 to autodetect gzip/zlib compression + unpack_zlib(reinterpret_cast(data.data()), data.size(), 15 + 32); } // this handles decoding a compressed Chunk of a region file @@ -26,12 +30,14 @@ NBT::NBT(const uchar *chunk) { // find chunk size in first 4 bytes, format is fifth byte int length = (chunk[0] << 24) | (chunk[1] << 16) | (chunk[2] << 8) | chunk[3]; + length -= 1; // -1 byte for compression format const unsigned char * data = reinterpret_cast(chunk) + 5; // supported compression formats - if (chunk[4] == 1) unpack_zlib(data, length - 1, 15 + 16); // rfc1952 not used by official Minecraft - if (chunk[4] == 2) unpack_zlib(data, length - 1, 15 + 0); // rfc1950 default for all Chunk data - if (chunk[4] == 3) decode_nbt(data, length - 1); // copy uncompressed data - if (chunk[4] == 4) unpack_lz4(data, length - 1); // LZ4 compression + if (chunk[4] == 1) unpack_zlib(data, length, 15 + 16); // rfc1952 not used by official Minecraft + if (chunk[4] == 2) unpack_zlib(data, length, 15 + 0); // rfc1950 default for all Chunk data + if (chunk[4] == 3) decode_nbt(data, length); // copy uncompressed data + if (chunk[4] == 4) unpack_lz4(data, length); // LZ4 compression + // silent return with empty data in case of unsupported format } Tag NBT::Null; @@ -43,7 +49,7 @@ static const int CHUNK_SIZE = 8192; // +16 gzip data (RFC 1952) // +32 autodetect zlib/gzip from header void NBT::unpack_zlib(const unsigned char * data, unsigned long length, int windowsize) { - + // prepare zlib stream structure z_stream stream; stream.zalloc = Z_NULL; stream.zfree = Z_NULL; @@ -51,16 +57,18 @@ void NBT::unpack_zlib(const unsigned char * data, unsigned long length, int wind stream.avail_in = length; stream.next_in = const_cast(data); // yes we violate "const", but zlib will not change the input data - char out[CHUNK_SIZE]; + // intermediate buffer for decompression + char buffer[CHUNK_SIZE]; + // final buffer for decompressed NBT data QByteArray nbt; inflateInit2(&stream, windowsize); do { stream.avail_out = CHUNK_SIZE; - stream.next_out = reinterpret_cast(out); + stream.next_out = reinterpret_cast(buffer); inflate(&stream, Z_NO_FLUSH); - nbt.append(out, CHUNK_SIZE - stream.avail_out); + nbt.append(buffer, CHUNK_SIZE - stream.avail_out); } while (stream.avail_out == 0); inflateEnd(&stream); @@ -70,13 +78,15 @@ void NBT::unpack_zlib(const unsigned char * data, unsigned long length, int wind // Minecraft uses an incompatible Java implementation of LZ4 // -> so we have to decode magic headers by ourself +// data is compressed in series of blocks, each with a header +// and luckily blocks are independend from each other static const char LZ4_MAGIC[] = {'L', 'Z', '4', 'B', 'l', 'o', 'c', 'k'}; static const int LZ4_MAGIC_LENGTH = sizeof(LZ4_MAGIC); static const int LZ4_COMPRESSION_METHOD_RAW = 0x10; static const int LZ4_COMPRESSION_METHOD_LZ4 = 0x20; static const int LZ4_COMPRESSION_LEVEL_BASE = 10; -static const int LZ4_DEFAULT_SEED = 0x9747b28c; +static const unsigned int LZ4_DEFAULT_SEED = 0x9747b28c; // read an int from Little Endian byte stream long readIntLE(const unsigned char * data) { @@ -97,38 +107,45 @@ void NBT::unpack_lz4(const unsigned char * data, unsigned long length) { } const unsigned char token = input[LZ4_MAGIC_LENGTH]; const unsigned char compression_method = token & 0xF0; - const unsigned char compression_level = LZ4_COMPRESSION_LEVEL_BASE + (token & 0x0F); +// unsigned char compression_level = LZ4_COMPRESSION_LEVEL_BASE + (token & 0x0F); if ((compression_method != LZ4_COMPRESSION_METHOD_RAW) && (compression_method != LZ4_COMPRESSION_METHOD_LZ4)) return; - const long compressed_length = readIntLE(input + LZ4_MAGIC_LENGTH + 1); - const long original_length = readIntLE(input + LZ4_MAGIC_LENGTH + 5); - const long checksum = readIntLE(input + LZ4_MAGIC_LENGTH + 9); + const long length_compressed = readIntLE(input + LZ4_MAGIC_LENGTH + 1); + const long length_original = readIntLE(input + LZ4_MAGIC_LENGTH + 5); + const XXH32_hash_t checksum = readIntLE(input + LZ4_MAGIC_LENGTH + 9); input += LZ4_MAGIC_LENGTH + 13; // special block indicating "no more data" - if ((compressed_length == 0) && (original_length == 0)) break; + if ((length_compressed == 0) && (length_original == 0)) break; // error checks - if (compressed_length < 0) return; - if (original_length < 0) return; - if ((compressed_length == 0) && (original_length != 0)) return; - if ((original_length == 0) && (compressed_length != 0)) return; - if ((compression_method == LZ4_COMPRESSION_METHOD_RAW) && (original_length != compressed_length)) return; + if (length_compressed < 0) return; + if (length_original < 0) return; + if ((length_compressed == 0) && (length_original != 0)) return; + if ((length_original == 0) && (length_compressed != 0)) return; + if ((compression_method == LZ4_COMPRESSION_METHOD_RAW) && (length_original != length_compressed)) return; // input buffer overflow check - if (((input - data) + compressed_length) > length) return; + if (((input - data) + length_compressed) > length) return; + + XXH32_hash_t checksum1; if (compression_method == LZ4_COMPRESSION_METHOD_RAW) { // copy RAW block - nbt.append(reinterpret_cast(input), original_length); + nbt.append(reinterpret_cast(input), length_original); + checksum1 = XXH32(input, length_original, LZ4_DEFAULT_SEED); } else { // decompress one block - char * output = reinterpret_cast(malloc(original_length)); - int ret = LZ4_decompress_safe(reinterpret_cast(input), output, - compressed_length, original_length); - nbt.append(output, ret); - free(output); + char * buffer = reinterpret_cast(malloc(length_original)); + int len = LZ4_decompress_safe(reinterpret_cast(input), buffer, + length_compressed, length_original); + checksum1 = XXH32(buffer, length_original, LZ4_DEFAULT_SEED); + nbt.append(buffer, len); + free(buffer); } // advance input data pointer - input += compressed_length; + input += length_compressed; + // check for matching checksum + checksum1 &= 0x0fffffff; // why the hell we have to remove the uppermost 4 bits ?!? + if (checksum != checksum1) return; } decode_nbt(nbt.constData(), nbt.size());