From 066a0d9b5f78215ab660eb74636ea015cbb77ec2 Mon Sep 17 00:00:00 2001 From: Ray Date: Sun, 16 Nov 2025 09:01:16 +1100 Subject: [PATCH] Add raw LZ4 block format support for cross-language compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add lz4_compress_raw() and lz4_uncompress_raw() functions to enable byte-exact compatibility with Python lz4.block, Rust lz4_flex, and Go pierrec/lz4. Changes: - Add php_lz4_compress_raw() static function (bypasses 4-byte size header) - Add php_lz4_uncompress_raw() static function (requires max_size parameter) - Add ZEND_FUNCTION wrappers for both new functions - Add argument info structures for parameter validation - Register new functions in lz4_functions array API: - string lz4_compress_raw(string $data, int $level = 0) Compresses data with NO size header (raw LZ4 block) - string lz4_uncompress_raw(string $data, int $max_size) Decompresses raw LZ4 block (max_size required) Tests: - tests/raw_001.phpt - Basic roundtrip functionality - tests/raw_002.phpt - Python compatibility test vectors - tests/raw_003.phpt - Error handling validation - tests/raw_004.phpt - Compression level testing Validation: - All 4 new tests pass - All 14 existing tests pass (backward compatibility maintained) - Bidirectional cross-language compatibility verified: * Python → PHP decompression: 4/4 test vectors pass * PHP → Python decompression: 4/4 test vectors pass Resolves incompatibility with ByteStorage envelope format used by CacheKit Python/Rust implementations. --- lz4.c | 179 +++++++++++++++++++++++++++++++++++++++++++++ tests/raw_001.phpt | 26 +++++++ tests/raw_002.phpt | 33 +++++++++ tests/raw_003.phpt | 43 +++++++++++ tests/raw_004.phpt | 46 ++++++++++++ 5 files changed, 327 insertions(+) create mode 100644 tests/raw_001.phpt create mode 100644 tests/raw_002.phpt create mode 100644 tests/raw_003.phpt create mode 100644 tests/raw_004.phpt diff --git a/lz4.c b/lz4.c index 99fce2c..2f0b929 100644 --- a/lz4.c +++ b/lz4.c @@ -74,6 +74,8 @@ static ZEND_FUNCTION(lz4_compress); static ZEND_FUNCTION(lz4_uncompress); static ZEND_FUNCTION(lz4_compress_frame); static ZEND_FUNCTION(lz4_uncompress_frame); +static ZEND_FUNCTION(lz4_compress_raw); +static ZEND_FUNCTION(lz4_uncompress_raw); ZEND_BEGIN_ARG_INFO_EX(arginfo_lz4_compress, 0, 0, 1) ZEND_ARG_INFO(0, data) @@ -98,6 +100,16 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_lz4_uncompress_frame, 0, 0, 1) ZEND_ARG_INFO(0, data) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_lz4_compress_raw, 0, 0, 1) + ZEND_ARG_INFO(0, data) + ZEND_ARG_INFO(0, level) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_lz4_uncompress_raw, 0, 0, 2) + ZEND_ARG_INFO(0, data) + ZEND_ARG_INFO(0, max_size) +ZEND_END_ARG_INFO() + #if PHP_MAJOR_VERSION >= 7 && defined(HAVE_APCU_SUPPORT) static int APC_SERIALIZER_NAME(lz4)(APC_SERIALIZER_ARGS); static int APC_UNSERIALIZER_NAME(lz4)(APC_UNSERIALIZER_ARGS); @@ -108,6 +120,8 @@ static zend_function_entry lz4_functions[] = { ZEND_FE(lz4_uncompress, arginfo_lz4_uncompress) ZEND_FE(lz4_compress_frame, arginfo_lz4_compress_frame) ZEND_FE(lz4_uncompress_frame, arginfo_lz4_uncompress_frame) + ZEND_FE(lz4_compress_raw, arginfo_lz4_compress_raw) + ZEND_FE(lz4_uncompress_raw, arginfo_lz4_uncompress_raw) ZEND_FE_END }; @@ -285,6 +299,93 @@ static int php_lz4_uncompress(const char* in, const int in_len, return SUCCESS; } +/** + * Raw LZ4 block compression (no size header) + * Compatible with Python lz4.block, Rust lz4_flex, Go pierrec/lz4 + */ +static int php_lz4_compress_raw(char* in, const int in_len, + char** out, int* out_len, + const int level) +{ + int max_len; + + /* Calculate maximum compressed size (LZ4 worst-case bound) */ + max_len = LZ4_compressBound(in_len); + + /* Allocate output buffer (NO header space, just compressed data) */ + *out = (char*)emalloc(max_len); + if (!*out) { + zend_error(E_WARNING, "lz4_compress_raw : memory error"); + *out_len = 0; + return FAILURE; + } + + /* Compress directly into output buffer (no offset) */ + if (level == 0) { + *out_len = LZ4_compress_default(in, *out, in_len, max_len); + } else if (level > 0 && level <= PHP_LZ4_CLEVEL_MAX) { + *out_len = LZ4_compress_HC(in, *out, in_len, max_len, level); + } else { + zend_error(E_WARNING, + "lz4_compress_raw: compression level (%d) must be within 1..%d", + level, PHP_LZ4_CLEVEL_MAX); + efree(*out); + *out = NULL; + *out_len = 0; + return FAILURE; + } + + /* Check for compression errors */ + if (*out_len <= 0) { + zend_error(E_WARNING, "lz4_compress_raw : compression failed"); + efree(*out); + *out = NULL; + *out_len = 0; + return FAILURE; + } + + /* NOTE: *out_len is the actual compressed size (no header added) */ + return SUCCESS; +} + +/** + * Raw LZ4 block decompression (no size header) + * Requires max_size parameter (from ByteStorage envelope) + */ +static int php_lz4_uncompress_raw(const char* in, const int in_len, + const int max_size, + char** out, int* out_len) +{ + /* Validate max_size parameter (required for raw decompression) */ + if (max_size <= 0) { + zend_error(E_WARNING, + "lz4_uncompress_raw : max_size parameter is required and must be positive"); + return FAILURE; + } + + /* Allocate output buffer based on provided max_size */ + *out = (char*)malloc(max_size + 1); + if (!*out) { + zend_error(E_WARNING, "lz4_uncompress_raw : memory error"); + return FAILURE; + } + + /* Decompress from start of input (no offset) */ + *out_len = LZ4_decompress_safe(in, *out, in_len, max_size); + + /* Check decompression result */ + if (*out_len <= 0) { + zend_error(E_WARNING, + "lz4_uncompress_raw : decompression failed (corrupted data or wrong max_size)"); + free(*out); + *out = NULL; + *out_len = 0; + return FAILURE; + } + + return SUCCESS; +} + /** * @param max_block_size 4: 64KB, 5: 256KB, 6: 1MB, 7: 4MB, all other values: 64KB * @param checksums 0: none, 1: frame content, 2: each block, 3: frame content + each block @@ -587,6 +688,84 @@ static ZEND_FUNCTION(lz4_uncompress_frame) free(output); } +static ZEND_FUNCTION(lz4_compress_raw) +{ + zval *data; + char *output; + int output_len; + long level = 0; + + /* Parse parameters: data (required), level (optional, default 0) */ + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "z|l", &data, &level) == FAILURE) { + RETURN_FALSE; + } + + /* Validate data is a string */ + if (Z_TYPE_P(data) != IS_STRING) { + zend_error(E_WARNING, + "lz4_compress_raw : expects parameter to be string."); + RETURN_FALSE; + } + + /* Call internal compression function */ + if (php_lz4_compress_raw(Z_STRVAL_P(data), Z_STRLEN_P(data), + &output, &output_len, + (int)level) == FAILURE) { + RETURN_FALSE; + } + + /* Return compressed data */ +#if ZEND_MODULE_API_NO >= 20141001 + RETVAL_STRINGL(output, output_len); +#else + RETVAL_STRINGL(output, output_len, 1); +#endif + + efree(output); +} + +static ZEND_FUNCTION(lz4_uncompress_raw) +{ + zval *data; + int output_len; + char *output; +#if ZEND_MODULE_API_NO >= 20141001 + zend_long max_size; +#else + long max_size; +#endif + + /* Parse parameters: data (required), max_size (required) */ + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "zl", &data, &max_size) == FAILURE) { + RETURN_FALSE; + } + + /* Validate data is a string */ + if (Z_TYPE_P(data) != IS_STRING) { + zend_error(E_WARNING, + "lz4_uncompress_raw : expects parameter to be string."); + RETURN_FALSE; + } + + /* Call internal decompression function */ + if (php_lz4_uncompress_raw(Z_STRVAL_P(data), Z_STRLEN_P(data), + (const int)max_size, + &output, &output_len) == FAILURE) { + RETURN_FALSE; + } + + /* Return decompressed data */ +#if ZEND_MODULE_API_NO >= 20141001 + RETVAL_STRINGL(output, output_len); +#else + RETVAL_STRINGL(output, output_len, 1); +#endif + + free(output); +} + #if PHP_MAJOR_VERSION >= 7 && defined(HAVE_APCU_SUPPORT) static int APC_SERIALIZER_NAME(lz4)(APC_SERIALIZER_ARGS) { diff --git a/tests/raw_001.phpt b/tests/raw_001.phpt new file mode 100644 index 0000000..0a35695 --- /dev/null +++ b/tests/raw_001.phpt @@ -0,0 +1,26 @@ +--TEST-- +Test lz4_compress_raw() and lz4_uncompress_raw() : basic functionality +--SKIPIF-- + +--FILE-- + +--EXPECT-- +*** Testing lz4_compress_raw() and lz4_uncompress_raw() *** +Original: Hello, World! +Compressed hex: d048656c6c6f2c20576f726c6421 +Compressed size: 14 bytes +Decompressed: Hello, World! +Match: YES diff --git a/tests/raw_002.phpt b/tests/raw_002.phpt new file mode 100644 index 0000000..afda7aa --- /dev/null +++ b/tests/raw_002.phpt @@ -0,0 +1,33 @@ +--TEST-- +Test lz4_compress_raw() : test vectors (Python compatibility) +--SKIPIF-- + +--FILE-- + 'Hello, World!', 'hex' => 'd048656c6c6f2c20576f726c6421'], + ['data' => 'test', 'hex' => '4074657374'], + ['data' => str_repeat('A', 100), 'hex' => '1f4101004b504141414141'], +]; + +foreach ($tests as $i => $test) { + $compressed = lz4_compress_raw($test['data']); + $actual_hex = bin2hex($compressed); + + echo "Test " . ($i + 1) . ": "; + if ($actual_hex === $test['hex']) { + echo "PASS\n"; + } else { + echo "FAIL\n"; + echo " Expected: {$test['hex']}\n"; + echo " Actual: $actual_hex\n"; + } +} +?> +--EXPECT-- +*** Testing lz4_compress_raw() with known test vectors *** +Test 1: PASS +Test 2: PASS +Test 3: PASS diff --git a/tests/raw_003.phpt b/tests/raw_003.phpt new file mode 100644 index 0000000..45713e1 --- /dev/null +++ b/tests/raw_003.phpt @@ -0,0 +1,43 @@ +--TEST-- +Test lz4_uncompress_raw() : error handling +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +*** Testing lz4_uncompress_raw() error handling *** + +Warning: lz4_uncompress_raw : max_size parameter is required and must be positive in %s on line %d +Zero max_size: REJECTED + +Warning: lz4_uncompress_raw : max_size parameter is required and must be positive in %s on line %d +Negative max_size: REJECTED + +Warning: lz4_uncompress_raw : decompression failed (corrupted data or wrong max_size) in %s on line %d +Corrupted data: REJECTED + +Warning: lz4_uncompress_raw : decompression failed (corrupted data or wrong max_size) in %s on line %d +Wrong max_size: REJECTED +Done diff --git a/tests/raw_004.phpt b/tests/raw_004.phpt new file mode 100644 index 0000000..6b0d926 --- /dev/null +++ b/tests/raw_004.phpt @@ -0,0 +1,46 @@ +--TEST-- +Test lz4_compress_raw() : compression levels +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +*** Testing lz4_compress_raw() with different compression levels *** +Level 0 (default): %d bytes +Level 1 (HC): %d bytes +Level 9 (HC max): %d bytes +Level 0 roundtrip: PASS +Level 1 roundtrip: PASS +Level 9 roundtrip: PASS + +Warning: lz4_compress_raw: compression level (999) must be within 1..%d in %s on line %d +Invalid level 999: REJECTED