From a2507d5bbdb451f25eea9c39e2f2c36f9a479e50 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 15 Oct 2025 13:21:28 -0400 Subject: [PATCH 001/109] v2 python prototype. --- Makefile | 8 + docs/source/specification.rst | 5 +- tamp/cli/main.py | 4 + tamp/compressor.py | 387 ++++++++++++++++++++++++++++---- tamp/decompressor.py | 114 +++++++--- tools/print_compressed_sizes.py | 43 ++++ 6 files changed, 486 insertions(+), 75 deletions(-) create mode 100644 tools/print_compressed_sizes.py diff --git a/Makefile b/Makefile index 9e21e42e..e78e8a4a 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,8 @@ help: @echo " make tamp-c-library Build static C library" @echo " make website-build Build website for deployment" +.PHONY: clean test collect-data venv download + ########################### # MicroPython Native Module @@ -182,6 +184,12 @@ build/enwik8-100kb: download-enwik8 build/enwik8-100kb.tamp: build/enwik8-100kb @poetry run tamp compress build/enwik8-100kb -o build/enwik8-100kb.tamp +download-micropython: + mkdir -p build + cd build && curl -O https://micropython.org/resources/firmware/RPI_PICO-20250415-v1.25.0.uf2 + +download: download-enwik8 download-silesia download-micropython + ################## # Python / Testing diff --git a/docs/source/specification.rst b/docs/source/specification.rst index f6128939..89a880a5 100644 --- a/docs/source/specification.rst +++ b/docs/source/specification.rst @@ -26,7 +26,8 @@ The bit-location 0 is equivalent to typical MSb position 7 of the first byte. | [2] | custom_dictionary | A custom dictionary initialization method was used | | | | and must be provided at decompression. | +---------+-------------------+---------------------------------------------------------------------+ -| [1] | reserved | Reserved for future use. Must be 0. | +| [1] | v2 | Enables Tamp v2 features. Generally improves compression, but the | +| | | decompressor needs to support it (introduced in tamp v1.11.0). | +---------+-------------------+---------------------------------------------------------------------+ | [0] | more_header | If ``True``, then the next byte in the stream is more header data. | | | | Currently always ``False``, but allows for future expandability. | @@ -60,7 +61,7 @@ Modifications are made to make the implementation simpler/faster. and points at the offset from the beginning of the dictionary buffer to the pattern. The shortest pattern-length is either going to be 2 or 3 bytes, depending on ``window`` and ``literal`` parameters. The shortest pattern-length encoding must be shorter than - an equivalent stream of literals. The longest pattern-length will the minimum + an equivalent stream of literals. The longest pattern-length is the minimum pattern-length plus 13. Classically, the ``offset`` is from the current position in the buffer. Doing so results diff --git a/tamp/cli/main.py b/tamp/cli/main.py index 27275c25..e2054b6f 100644 --- a/tamp/cli/main.py +++ b/tamp/cli/main.py @@ -119,6 +119,7 @@ def compress( ), ] = 8, lazy_matching: bool = False, + v1: bool = False, implementation: ImplementationType = None, ): """Compress an input file or stream. @@ -135,6 +136,8 @@ def compress( Number of bits used to represent a literal. lazy_matching: bool Use roughly 50% more cpu to get 0~2% better compression. + v1: bool + Use version 1 compression format. implementation: Optional[Literal["c", "python"]] Explicitly specify which implementation to use (c or python). Defaults to auto-detection. """ @@ -145,6 +148,7 @@ def compress( window=window, literal=literal, lazy_matching=lazy_matching, + v2=not v1, ) write(output, output_bytes) diff --git a/tamp/compressor.py b/tamp/compressor.py index 91c5516d..f1d4dc05 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -1,3 +1,11 @@ +"""Pure Python Tamp Compressor Reference Implementation. + +The goal of this module is for clarity and to be able to easily test new ideas. +Do not optimize this file for speed, unless it still maintains clarity. + +Some speed architectural optimizations might be tested here before implementing in other languages. +""" + from collections import deque from io import BytesIO @@ -13,26 +21,52 @@ from . import ExcessBitsError, bit_size, compute_min_pattern_size, initialize_dictionary -# encodes [min_pattern_bytes, min_pattern_bytes + 13] pattern lengths -_huffman_codes = b"\x00\x03\x08\x0b\x14$&+KT\x94\x95\xaa'" +# encodes [0, 14] pattern lengths +_huffman_codes = b"\x00\x03\x08\x0b\x14$&+KT\x94\x95\xaa'\xab" # These bit lengths pre-add the 1 bit for the 0-value is_literal flag. -_huffman_bits = b"\x02\x03\x05\x05\x06\x07\x07\x07\x08\x08\x09\x09\x09\x07" +_huffman_bits = b"\x02\x03\x05\x05\x06\x07\x07\x07\x08\x08\x09\x09\x09\x07\x09" _FLUSH_CODE = 0xAB # 8 bits +_RLE_SYMBOL = 12 +_RLE_MAX_WINDOW = 8 # Maximum number of RLE bytes to write to the window. +_EXTENDED_MATCH_SYMBOL = 13 +_LEADING_EXTENDED_MATCH_HUFFMAN_BITS = 3 +_LEADING_RLE_HUFFMAN_BITS = 4 + + +def _determine_rle_breakeven_point(min_pattern_size, window_bits): + # Determines if a pattern-match would be shorter than a RLE match. + # See how many bits this encoding would be with RLE + rle_length_bits = {} + for i in range(min_pattern_size, min_pattern_size + 11 + 1): + rle_length_bits[i] = 8 + _LEADING_RLE_HUFFMAN_BITS + _huffman_bits[(i - 1) >> _LEADING_RLE_HUFFMAN_BITS] + + pattern_length_bits = {} + for i in range(min_pattern_size, min_pattern_size + 11 + 1): + pattern_length_bits[i] = _huffman_bits[i - min_pattern_size] + window_bits + + breakeven_point = 0 + for pattern_size in sorted(pattern_length_bits): + if pattern_length_bits[pattern_size] < rle_length_bits[pattern_size]: + breakeven_point = pattern_size + + return breakeven_point class _BitWriter: """Writes bits to a stream.""" - def __init__(self, f, close_f_on_close=False): + def __init__(self, f, *, close_f_on_close: bool = False): self.close_f_on_close = close_f_on_close self.f = f - self.buffer = 0 # Basically a uint24 + self.buffer = 0 # Basically a uint32 self.bit_pos = 0 - def write_huffman(self, pattern_size): + def write_huffman_and_literal_flag(self, pattern_size): + # pattern_size in range [0, 14] return self.write(_huffman_codes[pattern_size], _huffman_bits[pattern_size]) def write(self, bits, num_bits, flush=True): + bits = int(bits) bits &= (1 << num_bits) - 1 self.bit_pos += num_bits self.buffer |= bits << (32 - self.bit_pos) @@ -75,7 +109,7 @@ def __init__(self, buffer): self.size = len(buffer) self.pos = 0 # Always pointing to the byte-to-be-overwritten - def write_byte(self, byte): # ~10% of time + def write_byte(self, byte): self.buffer[self.pos] = byte self.pos = (self.pos + 1) % self.size @@ -90,6 +124,25 @@ def index(self, pattern, start): raise ValueError("substring not found") return result + def write_from_self(self, position, size): + data = [self.buffer[(position + i) % self.size] for i in range(size)] + for x in data: + self.write_byte(x) + + def get(self, index, size): + out = bytearray(size) + for i in range(size): + pos = (index + i) % self.size + out[i] = self.buffer[pos] + return bytes(out) + + @property + def last_written_byte(self) -> int: + pos = self.pos - 1 + if pos < 0: + pos = self.size - 1 + return self.buffer[pos] # TODO: unit-test this thoroughly on initial start! + class Compressor: """Compresses data to a file or stream.""" @@ -102,6 +155,7 @@ def __init__( literal: int = 8, dictionary: Optional[bytearray] = None, lazy_matching: bool = False, + v2: bool = True, ): """ Parameters @@ -129,11 +183,26 @@ def __init__( lazy_matching: bool Use roughly 50% more cpu to get 0~2% better compression. """ - if lazy_matching: - raise NotImplementedError("lazy matching not implemented in pure python implementation.") + self.window_bits = window + self.literal_bits = literal + self.min_pattern_size = compute_min_pattern_size(window, literal) + self.v2: bool = v2 + + self._rle_count = 0 + self._rle_last_written = False # The previous write was an RLE token + + # "+1" Because a RLE of 1 is not valid. + self._rle_max_size = (13 << _LEADING_RLE_HUFFMAN_BITS) + (1 << _LEADING_RLE_HUFFMAN_BITS) + 1 + self._rle_breakeven = _determine_rle_breakeven_point(self.min_pattern_size, self.window_bits) + + self._extended_match_count = 0 + self._extended_match_position = 0 + + self.lazy_matching = lazy_matching + self._cached_match_index = -1 + self._cached_match_size = 0 if not hasattr(f, "write"): # It's probably a path-like object. - # TODO: then close it on close f = open(str(f), "wb") close_f_on_close = True else: @@ -143,11 +212,15 @@ def __init__( if dictionary and bit_size(len(dictionary) - 1) != window: raise ValueError("Dictionary-window size mismatch.") - self.window_bits = window - self.literal_bits = literal - - self.min_pattern_size = compute_min_pattern_size(window, literal) - self.max_pattern_size = self.min_pattern_size + 13 + if self.v2: + self.max_pattern_size = ( + self.min_pattern_size + + 11 + + (13 << _LEADING_EXTENDED_MATCH_HUFFMAN_BITS) + + (1 << _LEADING_EXTENDED_MATCH_HUFFMAN_BITS) + ) + else: + self.max_pattern_size = self.min_pattern_size + 13 self.literal_flag = 1 << self.literal_bits @@ -155,26 +228,171 @@ def __init__( buffer=dictionary if dictionary else initialize_dictionary(1 << window), ) - self._input_buffer = deque(maxlen=self.max_pattern_size) + self._input_buffer = deque(maxlen=16) # matching the C implementation # Callbacks for debugging/metric collection; can be externally set. - self.token_cb = None + self.match_cb = None + self.extended_match_cb = None self.literal_cb = None self.flush_cb = None + self.rle_cb = None + + # For debugging: how many uncompressed bytes have we consumed so far. + self.input_index = 0 # Write header self._bit_writer.write(window - 8, 3, flush=False) self._bit_writer.write(literal - 5, 2, flush=False) self._bit_writer.write(bool(dictionary), 1, flush=False) - self._bit_writer.write(0, 1, flush=False) # Reserved + self._bit_writer.write(self.v2, 1, flush=False) self._bit_writer.write(0, 1, flush=False) # No other header bytes + def _validate_no_match_overlap(self, write_pos, match_index, match_size): + """Check if writing a single byte will overlap with a future match section.""" + return write_pos < match_index or write_pos >= match_index + match_size + def _compress_input_buffer_single(self) -> int: - target = bytes(self._input_buffer) bytes_written = 0 + + if not self._input_buffer: + return bytes_written + + if self._extended_match_count: + while self._input_buffer: + if (self._extended_match_position + self._extended_match_count) >= self._window_buffer.size: + # wrap-around search: it's fine to check for the wrap now because it's super cheap here. + pos = (self._extended_match_position + self._extended_match_count) % self._window_buffer.size + if self._window_buffer.buffer[pos] == self._input_buffer[0]: + self._input_buffer.popleft() + self._extended_match_count += 1 + if self._extended_match_count == self.max_pattern_size: + bytes_written += self._write_extended_match() + return bytes_written + continue + # We've found the end of the match + bytes_written += self._write_extended_match() + return bytes_written + else: + # Search the remainder of the window buffer. + target = self._window_buffer.get(self._extended_match_position, self._extended_match_count) + target += bytes([self._input_buffer[0]]) + search_i, match = self._search(target, start=self._extended_match_position) + match_size = len(match) + if match_size > self._extended_match_count: + self._input_buffer.popleft() + self._extended_match_count = match_size + self._extended_match_position = search_i + if self._extended_match_count == self.max_pattern_size: + bytes_written += self._write_extended_match() + return bytes_written + continue + else: + # We've found the end of the match + bytes_written += self._write_extended_match() + return bytes_written + else: + # We ran out of input_buffer, return so caller can re-populate the input_buffer + return bytes_written + + target = bytes(self._input_buffer) search_i = 0 match_size = 1 - for match_size in range(self.min_pattern_size, len(target) + 1): + + if self.v2: + # RLE same-character-counting logic + while ( + target and target[0] == self._window_buffer.last_written_byte and self._rle_count < self._rle_max_size + ): + self._rle_count += 1 + self._input_buffer.popleft() + target = bytes(self._input_buffer) + if not target and self._rle_count != self._rle_max_size: + # Need more input to see if the RLE continues + return bytes_written + if self._rle_count == 1: + # This is not RLE; attempt to pattern-match or just write literals. + self._input_buffer.appendleft(self._window_buffer.last_written_byte) + target = bytes(self._input_buffer) + self._rle_count = 0 + elif self._rle_count: + if self._rle_count > self._rle_breakeven: + # It's certainly better to do a RLE write than searching for a pattern. + bytes_written += self._write_rle() + return bytes_written + else: + # We'll see if pattern-matching offers a better encoding. + target = bytes([self._window_buffer.last_written_byte]) * self._rle_count + + # Check if we have a cached match from lazy matching + if self.lazy_matching and self._cached_match_index >= 0: + search_i = self._cached_match_index + match_size = self._cached_match_size + match = self._window_buffer.get(search_i, match_size) + self._cached_match_index = -1 # Clear cache after using + else: + # Perform normal pattern-matching + search_i, match = self._search(target, start=0) + match_size = len(match) + + if self._rle_count: + # Check to see if the found pattern-match is more efficient than the RLE encoding. + assert self._rle_count >= 2 # noqa: S101 + if match_size >= self._rle_count: + # Pattern is better than RLE + bytes_written += self._write_match(search_i, match) + self._rle_count = 0 + return bytes_written + else: + # RLE is better than pattern + return self._write_rle() + + # Lazy matching logic + if ( + self.lazy_matching + and match_size >= self.min_pattern_size + and match_size <= 8 + and len(self._input_buffer) > match_size + 2 + ): + # Check if next position has a better match + next_target = bytes(list(self._input_buffer)[1:]) # Skip first byte + next_search_i, next_match = self._search(next_target, start=0) + next_match_size = len(next_match) + + # If next position has a better match, and the match doesn't overlap with the literal we are writing + if next_match_size > match_size and self._validate_no_match_overlap( + self._window_buffer.pos, next_search_i, next_match_size + ): + # Write literal at current position and cache the next match + literal = self._input_buffer.popleft() + bytes_written += self._write_literal(literal) + self._cached_match_index = next_search_i + self._cached_match_size = next_match_size + return bytes_written + + if match_size >= self.min_pattern_size: + if self.v2 and match_size > (self.min_pattern_size + 11): + # Protects +12 to be RLE symbol, and +13 to be extended match symbol + self._extended_match_position = search_i + self._extended_match_count = match_size + else: + bytes_written += self._write_match(search_i, match) + + self._rle_last_written = False + for _ in range(match_size): + self._input_buffer.popleft() + else: + literal = self._input_buffer.popleft() + bytes_written += self._write_literal(literal) + + return bytes_written + + def _search(self, target: bytes, start=0): + match_size = 0 + search_i = start + for match_size in range( + self.min_pattern_size, + min(len(target), self.max_pattern_size) + 1, + ): match = target[:match_size] try: search_i = self._window_buffer.index(match, search_i) @@ -183,30 +401,95 @@ def _compress_input_buffer_single(self) -> int: match_size -= 1 break match = target[:match_size] + return search_i, match - if match_size >= self.min_pattern_size: - if self.token_cb: - self.token_cb( - search_i, - match_size, - match, - ) - bytes_written += self._bit_writer.write_huffman(match_size - self.min_pattern_size) - bytes_written += self._bit_writer.write(search_i, self.window_bits) - self._window_buffer.write_bytes(match) + def _write_extended_huffman(self, value, leading_bits): + bytes_written = 0 + # the upper bits can have values [0, 13] + mask = (1 << leading_bits) - 1 + if value > ((13 << leading_bits) + mask) or value < 0: + raise ValueError + code_index = value >> leading_bits + # Don't use write_huffman_and_literal_flag since we don't want to write a flag. + bytes_written += self._bit_writer.write(_huffman_codes[code_index], _huffman_bits[code_index] - 1) + bytes_written += self._bit_writer.write(value & mask, leading_bits) + return bytes_written - for _ in range(match_size): - self._input_buffer.popleft() + def _write_extended_match(self): + bytes_written = 0 + if self.extended_match_cb: + string = self._window_buffer.get(self._extended_match_position, self._extended_match_count) + self.extended_match_cb( + self._window_buffer.pos, self._extended_match_position, self._extended_match_count, string + ) + bytes_written += self._bit_writer.write_huffman_and_literal_flag(_EXTENDED_MATCH_SYMBOL) + bytes_written += self._bit_writer.write(self._extended_match_position, self.window_bits) + bytes_written += self._write_extended_huffman( + self._extended_match_count - self.min_pattern_size - 11 - 1, + _LEADING_EXTENDED_MATCH_HUFFMAN_BITS, + ) + + self._window_buffer.write_from_self(self._extended_match_position, self._extended_match_count) + + # Reset state + self._extended_match_count = 0 + self._extended_match_position = 0 # Technically not necessary. + + return bytes_written + + def _write_literal(self, literal) -> int: + bytes_written = 0 + if self.literal_cb: + self.literal_cb(literal) + if literal >> self.literal_bits: + raise ExcessBitsError + + bytes_written += self._bit_writer.write(literal | self.literal_flag, self.literal_bits + 1) + self._window_buffer.write_byte(literal) + self._rle_last_written = False + return bytes_written + + def _write_match(self, search_i, match) -> int: + match_size = len(match) + + if self.match_cb: + self.match_cb( + self._window_buffer.pos, + search_i, + match_size, + match, + ) + + bytes_written = 0 + bytes_written += self._bit_writer.write_huffman_and_literal_flag(match_size - self.min_pattern_size) + bytes_written += self._bit_writer.write(search_i, self.window_bits) + self._window_buffer.write_bytes(match) + self._rle_last_written = False + return bytes_written + + def _write_rle(self) -> int: + bytes_written = 0 + last_written_byte = self._window_buffer.last_written_byte + + if self._rle_count == 0: + raise ValueError("No RLE to write.") + elif self._rle_count == 1: + # Just write a literal + bytes_written += self._write_literal(last_written_byte) else: - char = self._input_buffer.popleft() - if self.literal_cb: - self.literal_cb(char) - if char >> self.literal_bits: - raise ExcessBitsError + if self.rle_cb: + self.rle_cb(self._rle_count, last_written_byte) + bytes_written += self._bit_writer.write_huffman_and_literal_flag(_RLE_SYMBOL) + bytes_written += self._write_extended_huffman(self._rle_count - 2, _LEADING_RLE_HUFFMAN_BITS) - bytes_written += self._bit_writer.write(char | self.literal_flag, self.literal_bits + 1) - self._window_buffer.write_byte(char) + if not self._rle_last_written: + # Only write up to 8 bytes, and only if we didn't already do this. + # This prevents filling up the window buffer with unhelpful data. + self._window_buffer.write_bytes(bytes([last_written_byte]) * min(self._rle_count, _RLE_MAX_WINDOW)) + self._rle_last_written = True + + self._rle_count = 0 return bytes_written def write(self, data: Union[bytes, bytearray]) -> int: @@ -225,8 +508,12 @@ def write(self, data: Union[bytes, bytearray]) -> int: """ bytes_written = 0 - for char in data: - self._input_buffer.append(char) + self.input_index = 0 + while self.input_index < len(data): + if len(self._input_buffer) != self._input_buffer.maxlen: + self._input_buffer.append(data[self.input_index]) + self.input_index += 1 + if len(self._input_buffer) == self._input_buffer.maxlen: bytes_written += self._compress_input_buffer_single() @@ -255,7 +542,18 @@ def flush(self, write_token: bool = True) -> int: self.flush_cb() while self._input_buffer: bytes_written += self._compress_input_buffer_single() - bytes_written += self._bit_writer.flush(write_token=write_token) + if self.v2 and self._rle_count: + bytes_written += self._write_rle() + + # Clear any cached lazy matching state + if self.lazy_matching: + self._cached_match_index = -1 + self._cached_match_size = 0 + + bytes_written_flush = self._bit_writer.flush(write_token=write_token) + bytes_written += bytes_written_flush + if bytes_written_flush: + self._rle_last_written = False return bytes_written def close(self) -> int: @@ -300,6 +598,7 @@ def compress( literal: int = 8, dictionary: Optional[bytearray] = None, lazy_matching: bool = False, + v2: bool = True, ) -> bytes: """Single-call to compress data. @@ -326,6 +625,8 @@ def compress( first be initialized with :func:`~tamp.initialize_dictionary` lazy_matching: bool Use roughly 50% more cpu to get 0~2% better compression. + v2: bool + Use v2 compression format. Defaults to True. Returns ------- @@ -340,6 +641,7 @@ def compress( literal=literal, dictionary=dictionary, lazy_matching=lazy_matching, + v2=v2, ) c.write(data) else: @@ -349,6 +651,7 @@ def compress( literal=literal, dictionary=dictionary, lazy_matching=lazy_matching, + v2=v2, ) c.write(data) c.flush(write_token=False) diff --git a/tamp/decompressor.py b/tamp/decompressor.py index fb2fec7c..947dd023 100644 --- a/tamp/decompressor.py +++ b/tamp/decompressor.py @@ -10,6 +10,13 @@ _CHUNK_SIZE = 1 << 20 _FLUSH = object() +# These variables must match compressor.py +_RLE_SYMBOL = 12 +_EXTENDED_MATCH_SYMBOL = 13 +_RLE_MAX_WINDOW = 8 # Maximum number of RLE bytes to write to the window. +_LEADING_EXTENDED_MATCH_HUFFMAN_BITS = 3 +_LEADING_RLE_HUFFMAN_BITS = 4 + # Each key here are the huffman codes or'd with 0x80 # This is so that each lookup is easy/quick. _huffman_lookup = { @@ -57,15 +64,15 @@ def read(self, num_bits): if not byte: raise EOFError byte_value = int.from_bytes(byte, "little") - self.buffer |= byte_value << (24 - self.bit_pos) + self.buffer |= byte_value << (56 - self.bit_pos) self.bit_pos += 8 if self.backup_buffer is not None and self.backup_bit_pos is not None: - self.backup_buffer |= byte_value << (24 - self.backup_bit_pos) + self.backup_buffer |= byte_value << (56 - self.backup_bit_pos) self.backup_bit_pos += 8 - result = self.buffer >> (32 - num_bits) - mask = (1 << (32 - num_bits)) - 1 + result = self.buffer >> (64 - num_bits) + mask = (1 << (64 - num_bits)) - 1 self.buffer = (self.buffer & mask) << num_bits self.bit_pos -= num_bits @@ -120,6 +127,20 @@ def write_bytes(self, data): for byte in data: self.write_byte(byte) + def get(self, index, size): + out = bytearray(size) + for i in range(size): + pos = (index + i) % self.size + out[i] = self.buffer[pos] + return bytes(out) + + @property + def last_written_byte(self) -> int: + pos = self.pos - 1 + if pos < 0: + pos = self.size - 1 + return self.buffer[pos] # TODO: unit-test this thoroughly on initial start! + class Decompressor: """Decompresses a file or stream of tamp-compressed data. @@ -158,11 +179,9 @@ def __init__(self, f, *, dictionary: Optional[bytearray] = None): self.window_bits = self._bit_reader.read(3) + 8 self.literal_bits = self._bit_reader.read(2) + 5 uses_custom_dictionary = self._bit_reader.read(1) - reserved = self._bit_reader.read(1) + self.v2 = self._bit_reader.read(1) more_header_bytes = self._bit_reader.read(1) - - if reserved: - raise NotImplementedError + self._rle_last_written = False if more_header_bytes: raise NotImplementedError @@ -176,6 +195,7 @@ def __init__(self, f, *, dictionary: Optional[bytearray] = None): self.min_pattern_size = compute_min_pattern_size(self.window_bits, self.literal_bits) + # Used to store decoded bytes that do not currently fit in the output buffer. self.overflow = bytearray() def readinto(self, buf: bytearray) -> int: @@ -191,49 +211,81 @@ def readinto(self, buf: bytearray) -> int: int Number of bytes decompressed into buffer. """ + bytes_written = 0 + if len(self.overflow) > len(buf): buf[:] = self.overflow[: len(buf)] - written = len(buf) + bytes_written += len(buf) self.overflow = self.overflow[len(buf) :] - return written + return bytes_written elif self.overflow: buf[: len(self.overflow)] = self.overflow - written = len(self.overflow) + bytes_written += len(self.overflow) self.overflow = bytearray() - else: - written = 0 - while written < len(buf): + def write_to_output(string): + nonlocal bytes_written + match_size = len(string) + to_buf = min(len(buf) - bytes_written, match_size) + buf[bytes_written : bytes_written + to_buf] = string[:to_buf] + bytes_written += to_buf + if to_buf < match_size: + self.overflow[:] = string[to_buf:] + return False # stop decoding + return True + + while bytes_written < len(buf): try: with self._bit_reader: is_literal = self._bit_reader.read(1) if is_literal: - c = self._bit_reader.read(self.literal_bits) - self._window_buffer.write_byte(c) - buf[written] = c - written += 1 + string = bytes([self._bit_reader.read(self.literal_bits)]) + self._window_buffer.write_bytes(string) + self._rle_last_written = False else: match_size = self._bit_reader.read_huffman() if match_size is _FLUSH: self._bit_reader.clear() continue - match_size += self.min_pattern_size - index = self._bit_reader.read(self.window_bits) - - string = self._window_buffer.buffer[index : index + match_size] - self._window_buffer.write_bytes(string) - - to_buf = min(len(buf) - written, match_size) - buf[written : written + to_buf] = string[:to_buf] - written += to_buf - if to_buf < match_size: - self.overflow[:] = string[to_buf:] - break + if self.v2 and match_size > 11: + if match_size == _RLE_SYMBOL: + rle_count = self._bit_reader.read_huffman() + rle_count <<= _LEADING_RLE_HUFFMAN_BITS + rle_count += self._bit_reader.read(_LEADING_RLE_HUFFMAN_BITS) + rle_count += 1 + 1 + symbol = self._window_buffer.last_written_byte + string = bytes([symbol]) * rle_count + if not self._rle_last_written: + self._window_buffer.write_bytes(string[: min(rle_count, _RLE_MAX_WINDOW)]) + self._rle_last_written = True + elif match_size == _EXTENDED_MATCH_SYMBOL: + index = self._bit_reader.read(self.window_bits) + match_size = self._bit_reader.read_huffman() + match_size <<= _LEADING_EXTENDED_MATCH_HUFFMAN_BITS + match_size += self._bit_reader.read(_LEADING_EXTENDED_MATCH_HUFFMAN_BITS) + match_size += self.min_pattern_size + 11 + 1 + + string = self._window_buffer.get(index, match_size) + + self._window_buffer.write_bytes(string) + self._rle_last_written = False + else: + raise ValueError("unreachable") + else: + match_size += self.min_pattern_size + index = self._bit_reader.read(self.window_bits) + + string = self._window_buffer.get(index, match_size) + self._window_buffer.write_bytes(string) + self._rle_last_written = False + + if not write_to_output(string): + break except EOFError: break - return written + return bytes_written def read(self, size: int = -1) -> bytearray: """Decompresses data to bytes. diff --git a/tools/print_compressed_sizes.py b/tools/print_compressed_sizes.py new file mode 100644 index 00000000..ea2c71e2 --- /dev/null +++ b/tools/print_compressed_sizes.py @@ -0,0 +1,43 @@ +""" +Print compressed sizes for test files used in optimize-extended-huffman.py. + +This script compresses the same files that optimize-extended-huffman.py uses +and prints the compressed size for each file with thousands separators. +""" + +from pathlib import Path + +import tamp.compressor + + +def main(): + # Define test files (same as optimize-extended-huffman.py) + build_dir = Path(__file__).parent.parent / "build" + test_files = [build_dir / "enwik8", build_dir / "RPI_PICO-20250415-v1.25.0.uf2", *(build_dir / "silesia").iterdir()] + test_files.sort() + + ratios = [] + for file_path in test_files: + # Read and compress the file + data = file_path.read_bytes() + if len(data) == 0: + print(f"{file_path.name}: Empty file") + continue + + compressed_data = tamp.compressor.compress(data) + + original_size = len(data) + compressed_size = len(compressed_data) + + ratio = original_size / compressed_size + ratios.append(ratio) + + # Print with thousands separators + print(f"{file_path.name}: {compressed_size:,} (**{ratio:.3f}**)") + + avg = sum(ratios) / len(ratios) + print(f"Average Ratio: {avg}") + + +if __name__ == "__main__": + main() From 33bc24aa8a12047b8e51d55d287911a646eba507 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Fri, 23 Jan 2026 20:02:04 -0500 Subject: [PATCH 002/109] update pi pico firmware download to datasets. --- Makefile | 4 ++-- tools/print_compressed_sizes.py | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e78e8a4a..7b668924 100644 --- a/Makefile +++ b/Makefile @@ -185,8 +185,8 @@ build/enwik8-100kb.tamp: build/enwik8-100kb @poetry run tamp compress build/enwik8-100kb -o build/enwik8-100kb.tamp download-micropython: - mkdir -p build - cd build && curl -O https://micropython.org/resources/firmware/RPI_PICO-20250415-v1.25.0.uf2 + mkdir -p datasets + cd datasets && curl -O https://micropython.org/resources/firmware/RPI_PICO-20250415-v1.25.0.uf2 download: download-enwik8 download-silesia download-micropython diff --git a/tools/print_compressed_sizes.py b/tools/print_compressed_sizes.py index ea2c71e2..7cb809eb 100644 --- a/tools/print_compressed_sizes.py +++ b/tools/print_compressed_sizes.py @@ -12,8 +12,12 @@ def main(): # Define test files (same as optimize-extended-huffman.py) - build_dir = Path(__file__).parent.parent / "build" - test_files = [build_dir / "enwik8", build_dir / "RPI_PICO-20250415-v1.25.0.uf2", *(build_dir / "silesia").iterdir()] + datasets_dir = Path(__file__).parent.parent / "datasets" + test_files = [ + datasets_dir / "enwik8", + datasets_dir / "RPI_PICO-20250415-v1.25.0.uf2", + *(datasets_dir / "silesia").iterdir(), + ] test_files.sort() ratios = [] From 2c461841e20330fe7857707771c2928d20f85f56 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Fri, 23 Jan 2026 21:13:27 -0500 Subject: [PATCH 003/109] remove wrap-around logic; adds additional complications for minimal gains. --- tamp/compressor.py | 49 +++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/tamp/compressor.py b/tamp/compressor.py index f1d4dc05..722bfbca 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -260,39 +260,30 @@ def _compress_input_buffer_single(self) -> int: if self._extended_match_count: while self._input_buffer: if (self._extended_match_position + self._extended_match_count) >= self._window_buffer.size: - # wrap-around search: it's fine to check for the wrap now because it's super cheap here. - pos = (self._extended_match_position + self._extended_match_count) % self._window_buffer.size - if self._window_buffer.buffer[pos] == self._input_buffer[0]: - self._input_buffer.popleft() - self._extended_match_count += 1 - if self._extended_match_count == self.max_pattern_size: - bytes_written += self._write_extended_match() - return bytes_written - continue - # We've found the end of the match + # Reached window boundary - emit match (no wrap-around, only 0.02% compression loss) bytes_written += self._write_extended_match() return bytes_written - else: - # Search the remainder of the window buffer. - target = self._window_buffer.get(self._extended_match_position, self._extended_match_count) - target += bytes([self._input_buffer[0]]) - search_i, match = self._search(target, start=self._extended_match_position) - match_size = len(match) - if match_size > self._extended_match_count: - self._input_buffer.popleft() - self._extended_match_count = match_size - self._extended_match_position = search_i - if self._extended_match_count == self.max_pattern_size: - bytes_written += self._write_extended_match() - return bytes_written - continue - else: - # We've found the end of the match + + # Search the remainder of the window buffer for a longer match. + target = self._window_buffer.get(self._extended_match_position, self._extended_match_count) + target += bytes([self._input_buffer[0]]) + search_i, match = self._search(target, start=self._extended_match_position) + match_size = len(match) + if match_size > self._extended_match_count: + self._input_buffer.popleft() + self._extended_match_count = match_size + self._extended_match_position = search_i + if self._extended_match_count == self.max_pattern_size: bytes_written += self._write_extended_match() return bytes_written - else: - # We ran out of input_buffer, return so caller can re-populate the input_buffer - return bytes_written + continue + else: + # We've found the end of the match + bytes_written += self._write_extended_match() + return bytes_written + + # We ran out of input_buffer, return so caller can re-populate the input_buffer + return bytes_written target = bytes(self._input_buffer) search_i = 0 From e503cddd286eca6451ee5c989ee9fde147988222 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 08:27:12 -0500 Subject: [PATCH 004/109] Prepare cython bindings for v2 flag. --- tamp/_c_compressor.pyx | 4 ++++ tamp/ctamp.pxd | 1 + 2 files changed, 5 insertions(+) diff --git a/tamp/_c_compressor.pyx b/tamp/_c_compressor.pyx index a3e403de..ae690257 100644 --- a/tamp/_c_compressor.pyx +++ b/tamp/_c_compressor.pyx @@ -35,9 +35,13 @@ cdef class Compressor: int literal=8, dictionary=None, bool lazy_matching=False, + bool v2=False, ): cdef ctamp.TampConf conf + if v2: + raise NotImplementedError("v2 compression not yet supported in C compressor. Use --v1 flag or --implementation python.") + if dictionary and bit_size(len(dictionary) - 1) != window: raise ValueError("Dictionary-window size mismatch.") diff --git a/tamp/ctamp.pxd b/tamp/ctamp.pxd index 087ff62c..cd563ee1 100644 --- a/tamp/ctamp.pxd +++ b/tamp/ctamp.pxd @@ -6,6 +6,7 @@ cdef extern from "tamp/common.h": int window int literal bool use_custom_dictionary + bool v2 # v2 format (RLE, extended match). Read from header bit [1]. # The lazy_matching field is conditionally compiled based on TAMP_LAZY_MATCHING # We declare it here, but accessing it when the macro is disabled will cause compile errors # This is handled in the Cython code by always setting it when the struct is initialized From 8dba220ef02ce34a6a0b4143ce085bbb5fd046ec Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 08:28:38 -0500 Subject: [PATCH 005/109] add mssing extended-match-count flush. --- tamp/compressor.py | 2 ++ tests/test_cli.py | 4 ++-- tests/test_compressor.py | 18 +++++++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tamp/compressor.py b/tamp/compressor.py index 722bfbca..7b4b2a79 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -535,6 +535,8 @@ def flush(self, write_token: bool = True) -> int: bytes_written += self._compress_input_buffer_single() if self.v2 and self._rle_count: bytes_written += self._write_rle() + if self.v2 and self._extended_match_count: + bytes_written += self._write_extended_match() # Clear any cached lazy matching state if self.lazy_matching: diff --git a/tests/test_cli.py b/tests/test_cli.py index c23917dd..428431e5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -51,7 +51,7 @@ def test_compress_file_to_stdout(self): test_file.write_bytes(b"foo foo foo") with patch("sys.stdout.buffer.write") as mock_stdout: - app(["compress", str(test_file)], **_app_kwargs) + app(["compress", "--v1", str(test_file)], **_app_kwargs) mock_stdout.assert_called_once_with(compressed_foo_foo_foo) def test_compress_stdin_to_stdout(self): @@ -59,7 +59,7 @@ def test_compress_stdin_to_stdout(self): patch("sys.stdout.buffer.write") as mock_stdout, patch("sys.stdin.buffer.read", return_value="foo foo foo"), ): - app("compress", **_app_kwargs) + app(["compress", "--v1"], **_app_kwargs) mock_stdout.assert_called_once_with(compressed_foo_foo_foo) def test_decompress_file_to_stdout(self): diff --git a/tests/test_compressor.py b/tests/test_compressor.py index 188447c0..00b0ceac 100644 --- a/tests/test_compressor.py +++ b/tests/test_compressor.py @@ -94,7 +94,7 @@ def test_compressor_default(self): bytes_written = 0 with io.BytesIO() as f: - compressor = Compressor(f) + compressor = Compressor(f, v2=False) bytes_written += compressor.write(test_string) bytes_written += compressor.flush(write_token=False) @@ -106,7 +106,7 @@ def test_compressor_default(self): # Test Context Manager bytes_written = 0 - with io.BytesIO() as f, Compressor(f) as compressor: + with io.BytesIO() as f, Compressor(f, v2=False) as compressor: bytes_written += compressor.write(test_string) bytes_written += compressor.flush(write_token=False) @@ -137,7 +137,7 @@ def test_compressor_input_buffer(self): ) with io.BytesIO() as f: - compressor = Compressor(f) + compressor = Compressor(f, v2=False) compressor.write(b"f") compressor.write(b"oo") compressor.write(b" fo") @@ -171,7 +171,7 @@ def test_compressor_7bit(self): # fmt: on ) with io.BytesIO() as f: - compressor = Compressor(f, literal=7) + compressor = Compressor(f, literal=7, v2=False) compressor.write(test_string) compressor.flush(write_token=False) @@ -200,7 +200,7 @@ def test_compressor_predefined_dictionary(self): ) with io.BytesIO() as f: - compressor = Compressor(f, window=8, literal=7, dictionary=dictionary) + compressor = Compressor(f, window=8, literal=7, dictionary=dictionary, v2=False) compressor.write(test_string) compressor.flush(write_token=False) @@ -223,7 +223,7 @@ def test_oob_2_byte_pattern(self): test_string = memoryview(test_string_extended)[:3] # b"Q\x00Q" with io.BytesIO() as f: - compressor = Compressor(f) + compressor = Compressor(f, v2=False) compressor.write(test_string) compressor.flush(write_token=False) @@ -245,7 +245,7 @@ def test_oob_2_byte_pattern(self): def test_excess_bits(self): for Compressor in Compressors: with self.subTest(Compressor=Compressor), io.BytesIO() as f: - compressor = Compressor(f, literal=7) + compressor = Compressor(f, literal=7, v2=False) with self.assertRaises((ExcessBitsError, NativeExcessBitsError)): compressor.write(b"\xff") @@ -271,7 +271,7 @@ def test_single_shot_compress_text(self): ] # fmt: on ) - self.assertEqual(compress("foo foo foo"), expected) + self.assertEqual(compress("foo foo foo", v2=False), expected) def test_single_shot_compress_binary(self): for compress in compresses: @@ -293,7 +293,7 @@ def test_single_shot_compress_binary(self): ] # fmt: on ) - self.assertEqual(compress(b"foo foo foo"), expected) + self.assertEqual(compress(b"foo foo foo", v2=False), expected) def test_invalid_conf(self): for Compressor in Compressors: From bd7762dcc863b2fa2440594310a1f8b448f1c12f Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 10:42:05 -0500 Subject: [PATCH 006/109] common.h: add TampConf.v2 attribute and associated macros --- tamp/_c_src/tamp/common.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tamp/_c_src/tamp/common.h b/tamp/_c_src/tamp/common.h index 0b2b8e47..089b7769 100644 --- a/tamp/_c_src/tamp/common.h +++ b/tamp/_c_src/tamp/common.h @@ -68,6 +68,26 @@ extern "C" { #define TAMP_STREAM_WORK_BUFFER_SIZE 32 #endif +/* V2 format support (RLE, extended match). + * Enabled by default. Disable to save code size on minimal builds. + * Separate flags allow decompressor-only or compressor-only v2 support. + */ +#ifndef TAMP_V2_DECOMPRESS +#define TAMP_V2_DECOMPRESS 1 +#endif +#ifndef TAMP_V2_COMPRESS +#define TAMP_V2_COMPRESS 1 +#endif + +/* V2 encoding constants */ +#if TAMP_V2_DECOMPRESS || TAMP_V2_COMPRESS +#define TAMP_RLE_SYMBOL 12 +#define TAMP_EXTENDED_MATCH_SYMBOL 13 +#define TAMP_LEADING_EXTENDED_MATCH_BITS 3 +#define TAMP_LEADING_RLE_BITS 4 +#define TAMP_RLE_MAX_WINDOW 8 +#endif + enum { /* Normal/Recoverable status >= 0 */ TAMP_OK = 0, @@ -93,6 +113,7 @@ typedef struct TampConf { uint16_t window : 4; // number of window bits uint16_t literal : 4; // number of literal bits uint16_t use_custom_dictionary : 1; // Use a custom initialized dictionary. + uint16_t v2 : 1; // v2 format (RLE, extended match). Read from header bit [1]. #if TAMP_LAZY_MATCHING uint16_t lazy_matching : 1; // use Lazy Matching (spend 50-75% more CPU for around 0.5-2.0% better compression.) // only effects compression operations. From 0b208fffae32b53ba3f005ad8207117c7004dd53 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 14:04:54 -0500 Subject: [PATCH 007/109] wip c decompressor --- tamp/_c_src/tamp/decompressor.c | 309 ++++++++++++++++++++++++++++++-- tamp/_c_src/tamp/decompressor.h | 12 ++ 2 files changed, 310 insertions(+), 11 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index a7c0baaa..aa1999e8 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -7,6 +7,23 @@ #define FLUSH 15 +#if TAMP_V2_DECOMPRESS +/* Pending symbol states for v2 decode suspend/resume. + * - 0: No pending operation + * - 12: RLE - pending_ext_huffman holds partial huffman result or 0xFF for fresh/output-resume + * - 13: Extended match - fresh (need window_offset) + * - 14: Extended match - have window_offset in pending_window_offset (need huffman+trailing) + * - 15: Extended match - have window_offset and huffman (need trailing only) + * When skip_bytes > 0, we're resuming after output-full with full decode saved. + */ +#define PENDING_NONE 0 +#define PENDING_RLE 12 +#define PENDING_EXT_FRESH 13 +#define PENDING_EXT_HAVE_OFFSET 14 +#define PENDING_EXT_HAVE_HUFFMAN 15 +#define PARTIAL_STATE_NONE 0xFF +#endif + /** * This array was generated with tools/huffman_jump_table.py * @@ -51,6 +68,197 @@ static inline int8_t huffman_decode(uint32_t *bit_buffer, uint8_t *bit_buffer_po return code & 0xF; } +#if TAMP_V2_DECOMPRESS +/** + * @brief Decode huffman symbol + trailing bits from bit buffer. + * + * Shared helper for RLE and extended match decoding. + * Uses pending_ext_huffman to track partial decode state. + * + * @param d Decompressor state + * @param trailing_bits Number of trailing bits to read (3 or 4) + * @param result Output: (huffman << trailing_bits) + trailing + * @return TAMP_OK on success, TAMP_INPUT_EXHAUSTED if more bits needed + */ +static tamp_res decode_huffman_trailing(TampDecompressor *d, uint8_t trailing_bits, uint16_t *result) { + uint32_t bit_buffer = d->bit_buffer; + uint8_t bit_buffer_pos = d->bit_buffer_pos; + int8_t huffman_value; + + if (d->pending_ext_huffman != PARTIAL_STATE_NONE) { + huffman_value = d->pending_ext_huffman; + } else { + if (TAMP_UNLIKELY(bit_buffer_pos < 1)) return TAMP_INPUT_EXHAUSTED; + huffman_value = huffman_decode(&bit_buffer, &bit_buffer_pos); + } + + if (TAMP_UNLIKELY(bit_buffer_pos < trailing_bits)) { + d->bit_buffer = bit_buffer; + d->bit_buffer_pos = bit_buffer_pos; + d->pending_ext_huffman = huffman_value; + return TAMP_INPUT_EXHAUSTED; + } + + uint8_t trailing = bit_buffer >> (32 - trailing_bits); + bit_buffer <<= trailing_bits; + bit_buffer_pos -= trailing_bits; + + *result = (huffman_value << trailing_bits) + trailing; + + d->bit_buffer = bit_buffer; + d->bit_buffer_pos = bit_buffer_pos; + d->pending_ext_huffman = PARTIAL_STATE_NONE; + + return TAMP_OK; +} + +/** + * @brief Decode RLE token and write repeated bytes to output. + */ +static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, + size_t *output_written_size, uint16_t window_mask) { + uint16_t rle_count; + uint16_t skip = d->skip_bytes; + + if (skip > 0) { + rle_count = d->pending_window_offset; + } else { + uint16_t raw; + tamp_res res = decode_huffman_trailing(d, TAMP_LEADING_RLE_BITS, &raw); + if (res != TAMP_OK) return res; + rle_count = raw + 2; + } + + /* Get the byte to repeat (last written byte) */ + uint16_t prev_pos = (d->window_pos == 0) ? window_mask : (d->window_pos - 1); + uint8_t symbol = d->window[prev_pos]; + + /* Calculate how many to write this call */ + uint16_t remaining_count = rle_count - skip; + size_t output_space = output_end - *output; + uint16_t to_write; + + if (TAMP_UNLIKELY(remaining_count > output_space)) { + /* Partial write */ + to_write = output_space; + d->skip_bytes = skip + output_space; + d->pending_symbol = PENDING_RLE; + d->pending_window_offset = rle_count; + } else { + /* Complete write */ + to_write = remaining_count; + d->skip_bytes = 0; + d->pending_symbol = PENDING_NONE; + } + + /* Write repeated bytes to output */ + for (uint16_t i = 0; i < to_write; i++) { + *(*output)++ = symbol; + } + *output_written_size += to_write; + + /* Update window only on first chunk (skip==0) and not after another RLE */ + if (skip == 0 && !d->rle_last_written) { + uint16_t window_write = (rle_count < TAMP_RLE_MAX_WINDOW) ? rle_count : TAMP_RLE_MAX_WINDOW; + for (uint16_t i = 0; i < window_write; i++) { + d->window[d->window_pos] = symbol; + d->window_pos = (d->window_pos + 1) & window_mask; + } + } + d->rle_last_written = 1; + + return (d->pending_symbol == PENDING_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; +} + +/** + * @brief Decode extended match token and copy from window to output. + */ +static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, + size_t *output_written_size, uint8_t conf_window, uint8_t min_pattern_size, + uint16_t window_mask) { + uint16_t window_offset; + uint16_t match_size; + uint16_t skip = d->skip_bytes; + uint8_t pending = d->pending_symbol; + + if (skip > 0) { + /* Resume from output-full: window_offset and match_size already saved */ + window_offset = d->pending_window_offset; + match_size = d->pending_ext_huffman; + } else { + /* Step 1: Get window_offset (saved or decode fresh) */ + if (pending >= PENDING_EXT_HAVE_OFFSET) { + window_offset = d->pending_window_offset; + } else { + if (TAMP_UNLIKELY(d->bit_buffer_pos < conf_window)) return TAMP_INPUT_EXHAUSTED; + window_offset = d->bit_buffer >> (32 - conf_window); + d->bit_buffer <<= conf_window; + d->bit_buffer_pos -= conf_window; + /* Save window_offset in case huffman+trailing needs more input */ + d->pending_window_offset = window_offset; + } + + /* Step 2: Decode huffman + trailing bits */ + uint16_t raw; + tamp_res res = decode_huffman_trailing(d, TAMP_LEADING_EXTENDED_MATCH_BITS, &raw); + if (res != TAMP_OK) { + /* Update pending_symbol based on where we stopped */ + d->pending_symbol = + (d->pending_ext_huffman != PARTIAL_STATE_NONE) ? PENDING_EXT_HAVE_HUFFMAN : PENDING_EXT_HAVE_OFFSET; + return res; + } + + match_size = raw + min_pattern_size + 12; + } + + /* Security check: validate window bounds */ + const uint32_t window_size = (1u << conf_window); + if (TAMP_UNLIKELY((uint32_t)window_offset >= window_size || + (uint32_t)window_offset + (uint32_t)match_size > window_size)) { + return TAMP_OOB; + } + + /* Calculate how many to write this call */ + uint16_t remaining_count = match_size - skip; + size_t output_space = output_end - *output; + uint16_t to_write; + + if (TAMP_UNLIKELY(remaining_count > output_space)) { + /* Partial write */ + to_write = output_space; + d->skip_bytes = skip + output_space; + d->pending_symbol = PENDING_EXT_FRESH; + d->pending_window_offset = window_offset; + d->pending_ext_huffman = match_size; + } else { + /* Complete write */ + to_write = remaining_count; + d->skip_bytes = 0; + d->pending_symbol = PENDING_NONE; + } + + /* Copy from window to output */ + uint16_t src_offset = window_offset + skip; + for (uint16_t i = 0; i < to_write; i++) { + *(*output)++ = d->window[src_offset + i]; + } + *output_written_size += to_write; + + /* Update window only on complete decode */ + if (d->pending_symbol == PENDING_NONE) { + uint16_t wp = d->window_pos; + for (uint16_t i = 0; i < match_size; i++) { + d->window[wp] = d->window[(window_offset + i) & window_mask]; + wp = (wp + 1) & window_mask; + } + d->window_pos = wp; + d->rle_last_written = 0; + } + + return (d->pending_symbol == PENDING_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; +} +#endif /* TAMP_V2_DECOMPRESS */ + /** * @brief Copy pattern from window to window, updating window_pos. * @@ -84,13 +292,13 @@ tamp_res tamp_decompressor_read_header(TampConf *conf, const unsigned char *inpu size_t *input_consumed_size) { if (input_consumed_size) (*input_consumed_size) = 0; if (input_size == 0) return TAMP_INPUT_EXHAUSTED; - if (input[0] & 0x2) return TAMP_INVALID_CONF; // Reserved if (input[0] & 0x1) return TAMP_INVALID_CONF; // Currently only a single header byte is supported. if (input_consumed_size) (*input_consumed_size)++; conf->window = ((input[0] >> 5) & 0x7) + 8; conf->literal = ((input[0] >> 3) & 0x3) + 5; conf->use_custom_dictionary = ((input[0] >> 2) & 0x1); + conf->v2 = ((input[0] >> 1) & 0x1); return TAMP_OK; } @@ -101,7 +309,8 @@ tamp_res tamp_decompressor_read_header(TampConf *conf, const unsigned char *inpu * * window_bits_max */ static tamp_res tamp_decompressor_populate_from_conf(TampDecompressor *decompressor, uint8_t conf_window, - uint8_t conf_literal, uint8_t conf_use_custom_dictionary) { + uint8_t conf_literal, uint8_t conf_use_custom_dictionary, + uint8_t conf_v2) { if (conf_window < 8 || conf_window > 15) return TAMP_INVALID_CONF; if (conf_literal < 5 || conf_literal > 8) return TAMP_INVALID_CONF; if (conf_window > decompressor->window_bits_max) return TAMP_INVALID_CONF; @@ -111,6 +320,11 @@ static tamp_res tamp_decompressor_populate_from_conf(TampDecompressor *decompres decompressor->conf_literal = conf_literal; decompressor->min_pattern_size = tamp_compute_min_pattern_size(conf_window, conf_literal); decompressor->configured = true; +#if TAMP_V2_DECOMPRESS + decompressor->conf_v2 = conf_v2; +#else + (void)conf_v2; +#endif return TAMP_OK; } @@ -128,7 +342,7 @@ tamp_res tamp_decompressor_init(TampDecompressor *decompressor, const TampConf * decompressor->window_bits_max = window_bits; if (conf) { res = tamp_decompressor_populate_from_conf(decompressor, conf->window, conf->literal, - conf->use_custom_dictionary); + conf->use_custom_dictionary, conf->v2); } return res; @@ -156,7 +370,8 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne res = tamp_decompressor_read_header(&conf, input, input_end - input, &header_consumed_size); if (res != TAMP_OK) return res; - res = tamp_decompressor_populate_from_conf(decompressor, conf.window, conf.literal, conf.use_custom_dictionary); + res = tamp_decompressor_populate_from_conf(decompressor, conf.window, conf.literal, conf.use_custom_dictionary, + conf.v2); if (res != TAMP_OK) return res; input += header_consumed_size; @@ -169,15 +384,48 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne const uint8_t min_pattern_size = decompressor->min_pattern_size; const uint16_t window_mask = (1 << conf_window) - 1; - while (input != input_end || decompressor->bit_buffer_pos) { +#if TAMP_V2_DECOMPRESS + const bool v2_enabled = decompressor->conf_v2; +#endif + +/* Macro to refill bit buffer from input. Used before returning TAMP_INPUT_EXHAUSTED + * to ensure we consume all available input first. */ +#define REFILL() \ + while (input != input_end && decompressor->bit_buffer_pos <= 24) { \ + decompressor->bit_buffer_pos += 8; \ + decompressor->bit_buffer |= (uint32_t) * input << (32 - decompressor->bit_buffer_pos); \ + input++; \ + (*input_consumed_size)++; \ + } + + while (input != input_end || decompressor->bit_buffer_pos +#if TAMP_V2_DECOMPRESS + || decompressor->pending_symbol +#endif + ) { // Populate the bit buffer - while (input != input_end && decompressor->bit_buffer_pos <= 24) { - uint32_t t = *input; - decompressor->bit_buffer_pos += 8; - decompressor->bit_buffer |= t << (32 - decompressor->bit_buffer_pos); - input++; - (*input_consumed_size)++; + REFILL(); + +#if TAMP_V2_DECOMPRESS + /* Resume pending v2 operation. Retry after refill if helper needs more bits. */ + if (TAMP_UNLIKELY(decompressor->pending_symbol)) { + if (TAMP_UNLIKELY(output == output_end)) return TAMP_OUTPUT_FULL; + tamp_res v2_res; + if (decompressor->pending_symbol == PENDING_RLE) { + v2_res = decode_rle(decompressor, &output, output_end, output_written_size, window_mask); + } else { + v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size, conf_window, + min_pattern_size, window_mask); + } + if (v2_res == TAMP_INPUT_EXHAUSTED) { + REFILL(); + if (input == input_end) return TAMP_INPUT_EXHAUSTED; + continue; /* Retry with refilled buffer */ + } + if (v2_res != TAMP_OK) return v2_res; + continue; } +#endif if (TAMP_UNLIKELY(decompressor->bit_buffer_pos == 0)) return TAMP_INPUT_EXHAUSTED; @@ -200,6 +448,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne output++; (*output_written_size)++; +#if TAMP_V2_DECOMPRESS + decompressor->rle_last_written = 0; +#endif } else { // is token; attempt a decode /* copy the bit buffers so that we can abort at any time */ @@ -225,6 +476,39 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne bit_buffer_pos & ~7; // Round bit_buffer_pos down to nearest multiple of 8. continue; } + +#if TAMP_V2_DECOMPRESS + /* Check for v2 symbols */ + if (TAMP_UNLIKELY(v2_enabled && match_size >= TAMP_RLE_SYMBOL)) { + /* Commit bit buffer and set pending symbol before calling helper. + * Initialize partial state fields to indicate fresh decode. */ + decompressor->bit_buffer = bit_buffer; + decompressor->bit_buffer_pos = bit_buffer_pos; + decompressor->pending_window_offset = 0; + decompressor->pending_ext_huffman = PARTIAL_STATE_NONE; + + tamp_res v2_res; + if (match_size == TAMP_RLE_SYMBOL) { + decompressor->pending_symbol = PENDING_RLE; + v2_res = decode_rle(decompressor, &output, output_end, output_written_size, window_mask); + } else if (match_size == TAMP_EXTENDED_MATCH_SYMBOL) { + decompressor->pending_symbol = PENDING_EXT_FRESH; + v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size, conf_window, + min_pattern_size, window_mask); + } else { + return TAMP_ERROR; /* Invalid v2 symbol */ + } + /* On success, helper clears pending_symbol; on error, it stays set for resume */ + if (v2_res == TAMP_INPUT_EXHAUSTED) { + REFILL(); + if (input == input_end) return TAMP_INPUT_EXHAUSTED; + continue; /* Retry with refilled buffer */ + } + if (v2_res != TAMP_OK) return v2_res; + continue; + } +#endif + if (TAMP_UNLIKELY(bit_buffer_pos < conf_window)) { // There are not enough bits to decode window offset return TAMP_INPUT_EXHAUSTED; @@ -270,6 +554,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne uint16_t wp = decompressor->window_pos; window_copy(decompressor->window, &wp, window_offset, match_size, window_mask); decompressor->window_pos = wp; +#if TAMP_V2_DECOMPRESS + decompressor->rle_last_written = 0; +#endif } } if (TAMP_UNLIKELY(callback && (res = callback(user_data, *output_written_size, input_size)))) diff --git a/tamp/_c_src/tamp/decompressor.h b/tamp/_c_src/tamp/decompressor.h index 1608a6d5..a7c8a58e 100644 --- a/tamp/_c_src/tamp/decompressor.h +++ b/tamp/_c_src/tamp/decompressor.h @@ -17,17 +17,29 @@ typedef struct { uint32_t bit_buffer; // Bit buffer for reading compressed data (32 bits) uint16_t window_pos; // Current position in window (15 bits) uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits) +#if TAMP_V2_DECOMPRESS + uint8_t pending_symbol; // State machine: 0=none, 12=RLE, 13=ext match need window, + // 14=ext match need length, 15=ext match need raw bits + uint16_t pending_window_offset; // Saved window_offset for extended match resume + uint8_t pending_ext_huffman; // Saved ext_huffman for extended match resume (state 15) +#endif /* WARM: read once at start of decompress, cached in locals */ uint8_t conf_window : 4; // Window bits from config uint8_t conf_literal : 4; // Literal bits from config uint8_t min_pattern_size : 2; // Minimum pattern size, 2 or 3 +#if TAMP_V2_DECOMPRESS + uint8_t conf_v2 : 1; // v2 format enabled (from header) +#endif /* COLD: rarely accessed (init or edge cases). * Bitfields save space; add new cold fields here. */ uint8_t skip_bytes : 4; // For output-buffer-limited resumption uint8_t window_bits_max : 4; // Max window bits buffer can hold uint8_t configured : 1; // Whether config has been set +#if TAMP_V2_DECOMPRESS + uint8_t rle_last_written : 1; // Previous write was RLE (skip window re-write) +#endif } TampDecompressor; /** From 5dbcefbe433a9fe7aebd6f26a4fcf57b4911782a Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 14:33:28 -0500 Subject: [PATCH 008/109] swap window/extended-match length in encoding --- Makefile | 130 ++++++++++++++++++-------------- tamp/_c_src/tamp/decompressor.c | 109 +++++++++++++------------- tamp/_c_src/tamp/decompressor.h | 9 +-- tamp/compressor.py | 3 +- tamp/decompressor.py | 3 +- 5 files changed, 135 insertions(+), 119 deletions(-) diff --git a/Makefile b/Makefile index 7b668924..0949b15b 100644 --- a/Makefile +++ b/Makefile @@ -182,7 +182,8 @@ build/enwik8-100kb: download-enwik8 @head -c 100000 datasets/enwik8 > build/enwik8-100kb build/enwik8-100kb.tamp: build/enwik8-100kb - @poetry run tamp compress build/enwik8-100kb -o build/enwik8-100kb.tamp + @# Use Python implementation until C compressor supports v2 + @poetry run tamp compress --implementation=python build/enwik8-100kb -o build/enwik8-100kb.tamp download-micropython: mkdir -p datasets @@ -490,7 +491,7 @@ tamp-c-library: build/tamp.a # Binary Sizes ############### # Generate binary size information for README table (armv6m with -O3). -.PHONY: binary-size c-size +.PHONY: binary-size c-size c-size-v1 c-size-v2 ARM_CC := arm-none-eabi-gcc ARM_AR := arm-none-eabi-ar @@ -501,61 +502,76 @@ C_SRC_COMMON = tamp/_c_src/tamp/common.c C_SRC_COMP = tamp/_c_src/tamp/compressor.c C_SRC_DECOMP = tamp/_c_src/tamp/decompressor.c -# Build compressor-only library (without stream API) -build/arm/tamp_comp.a: $(C_SRC_COMMON) $(C_SRC_COMP) - @mkdir -p build/arm - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common_c.o - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -DTAMP_STREAM=0 -c $(C_SRC_COMP) -o build/arm/compressor.o - $(ARM_AR) rcs $@ build/arm/common_c.o build/arm/compressor.o - -# Build decompressor-only library (without stream API) -build/arm/tamp_decomp.a: $(C_SRC_COMMON) $(C_SRC_DECOMP) - @mkdir -p build/arm - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common_d.o - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o - $(ARM_AR) rcs $@ build/arm/common_d.o build/arm/decompressor.o - -# Build full library (without stream API) -build/arm/tamp_full.a: $(C_SRC_COMMON) $(C_SRC_COMP) $(C_SRC_DECOMP) - @mkdir -p build/arm - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common_f.o - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMP) -o build/arm/compressor_f.o - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_DECOMP) -o build/arm/decompressor_f.o - $(ARM_AR) rcs $@ build/arm/common_f.o build/arm/compressor_f.o build/arm/decompressor_f.o - -# Build compressor-only library (with stream API, the default) -build/arm/tamp_comp_stream.a: $(C_SRC_COMMON) $(C_SRC_COMP) - @mkdir -p build/arm - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -c $(C_SRC_COMMON) -o build/arm/common_cs.o - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -c $(C_SRC_COMP) -o build/arm/compressor_s.o - $(ARM_AR) rcs $@ build/arm/common_cs.o build/arm/compressor_s.o - -# Build decompressor-only library (with stream API, the default) -build/arm/tamp_decomp_stream.a: $(C_SRC_COMMON) $(C_SRC_DECOMP) - @mkdir -p build/arm - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMMON) -o build/arm/common_ds.o - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_DECOMP) -o build/arm/decompressor_s.o - $(ARM_AR) rcs $@ build/arm/common_ds.o build/arm/decompressor_s.o - -# Build full library (with stream API, the default) -build/arm/tamp_full_stream.a: $(C_SRC_COMMON) $(C_SRC_COMP) $(C_SRC_DECOMP) - @mkdir -p build/arm - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMMON) -o build/arm/common_fs.o - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMP) -o build/arm/compressor_fs.o - $(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_DECOMP) -o build/arm/decompressor_fs.o - $(ARM_AR) rcs $@ build/arm/common_fs.o build/arm/compressor_fs.o build/arm/decompressor_fs.o - -c-size: - @rm -rf build/arm - @$(MAKE) --no-print-directory build/arm/tamp_comp_stream.a build/arm/tamp_decomp_stream.a build/arm/tamp_full_stream.a build/arm/tamp_comp.a build/arm/tamp_decomp.a build/arm/tamp_full.a - @size_comp=$$($(ARM_SIZE) -B --totals build/arm/tamp_comp.a 2>/dev/null | grep TOTALS | awk '{print $$1+$$2}'); \ - size_decomp=$$($(ARM_SIZE) -B --totals build/arm/tamp_decomp.a 2>/dev/null | grep TOTALS | awk '{print $$1+$$2}'); \ - size_full=$$($(ARM_SIZE) -B --totals build/arm/tamp_full.a 2>/dev/null | grep TOTALS | awk '{print $$1+$$2}'); \ - printf 'Tamp (C, -DTAMP_STREAM=0) %d %d %d\n' $$size_comp $$size_decomp $$size_full - @size_comp=$$($(ARM_SIZE) -B --totals build/arm/tamp_comp_stream.a 2>/dev/null | grep TOTALS | awk '{print $$1+$$2}'); \ - size_decomp=$$($(ARM_SIZE) -B --totals build/arm/tamp_decomp_stream.a 2>/dev/null | grep TOTALS | awk '{print $$1+$$2}'); \ - size_full=$$($(ARM_SIZE) -B --totals build/arm/tamp_full_stream.a 2>/dev/null | grep TOTALS | awk '{print $$1+$$2}'); \ - printf 'Tamp (C) %d %d %d\n' $$size_comp $$size_decomp $$size_full +# V2 flags to disable v2 support +V1_FLAGS = -DTAMP_V2_COMPRESS=0 -DTAMP_V2_DECOMPRESS=0 + +c-size-v1: + @rm -rf build/arm && mkdir -p build/arm + @# v1 without stream API + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -DTAMP_STREAM=0 -c $(C_SRC_COMP) -o build/arm/compressor.o + @$(ARM_AR) rcs build/arm/v1_comp.a build/arm/common.o build/arm/compressor.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o + @$(ARM_AR) rcs build/arm/v1_decomp.a build/arm/common.o build/arm/decompressor.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMP) -o build/arm/compressor.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o + @$(ARM_AR) rcs build/arm/v1_full.a build/arm/common.o build/arm/compressor.o build/arm/decompressor.o + @# v1 with stream API + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -c $(C_SRC_COMP) -o build/arm/compressor.o + @$(ARM_AR) rcs build/arm/v1_comp_s.a build/arm/common.o build/arm/compressor.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o + @$(ARM_AR) rcs build/arm/v1_decomp_s.a build/arm/common.o build/arm/decompressor.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMP) -o build/arm/compressor.o + @$(ARM_CC) $(ARM_CFLAGS) $(V1_FLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o + @$(ARM_AR) rcs build/arm/v1_full_s.a build/arm/common.o build/arm/compressor.o build/arm/decompressor.o + @size_comp=$$($(ARM_SIZE) -B --totals build/arm/v1_comp.a | grep TOTALS | awk '{print $$1+$$2}'); \ + size_decomp=$$($(ARM_SIZE) -B --totals build/arm/v1_decomp.a | grep TOTALS | awk '{print $$1+$$2}'); \ + size_full=$$($(ARM_SIZE) -B --totals build/arm/v1_full.a | grep TOTALS | awk '{print $$1+$$2}'); \ + printf 'Tamp v1 (C, no stream) %d %d %d\n' $$size_comp $$size_decomp $$size_full + @size_comp=$$($(ARM_SIZE) -B --totals build/arm/v1_comp_s.a | grep TOTALS | awk '{print $$1+$$2}'); \ + size_decomp=$$($(ARM_SIZE) -B --totals build/arm/v1_decomp_s.a | grep TOTALS | awk '{print $$1+$$2}'); \ + size_full=$$($(ARM_SIZE) -B --totals build/arm/v1_full_s.a | grep TOTALS | awk '{print $$1+$$2}'); \ + printf 'Tamp v1 (C) %d %d %d\n' $$size_comp $$size_decomp $$size_full + +c-size-v2: + @rm -rf build/arm && mkdir -p build/arm + @# v2 without stream API + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -DTAMP_STREAM=0 -c $(C_SRC_COMP) -o build/arm/compressor.o + @$(ARM_AR) rcs build/arm/v2_comp.a build/arm/common.o build/arm/compressor.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o + @$(ARM_AR) rcs build/arm/v2_decomp.a build/arm/common.o build/arm/decompressor.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_COMP) -o build/arm/compressor.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -DTAMP_STREAM=0 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o + @$(ARM_AR) rcs build/arm/v2_full.a build/arm/common.o build/arm/compressor.o build/arm/decompressor.o + @# v2 with stream API + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=0 -c $(C_SRC_COMP) -o build/arm/compressor.o + @$(ARM_AR) rcs build/arm/v2_comp_s.a build/arm/common.o build/arm/compressor.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=0 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o + @$(ARM_AR) rcs build/arm/v2_decomp_s.a build/arm/common.o build/arm/decompressor.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMMON) -o build/arm/common.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_COMP) -o build/arm/compressor.o + @$(ARM_CC) $(ARM_CFLAGS) -DTAMP_COMPRESSOR=1 -DTAMP_DECOMPRESSOR=1 -c $(C_SRC_DECOMP) -o build/arm/decompressor.o + @$(ARM_AR) rcs build/arm/v2_full_s.a build/arm/common.o build/arm/compressor.o build/arm/decompressor.o + @size_comp=$$($(ARM_SIZE) -B --totals build/arm/v2_comp.a | grep TOTALS | awk '{print $$1+$$2}'); \ + size_decomp=$$($(ARM_SIZE) -B --totals build/arm/v2_decomp.a | grep TOTALS | awk '{print $$1+$$2}'); \ + size_full=$$($(ARM_SIZE) -B --totals build/arm/v2_full.a | grep TOTALS | awk '{print $$1+$$2}'); \ + printf 'Tamp v2 (C, no stream) %d %d %d\n' $$size_comp $$size_decomp $$size_full + @size_comp=$$($(ARM_SIZE) -B --totals build/arm/v2_comp_s.a | grep TOTALS | awk '{print $$1+$$2}'); \ + size_decomp=$$($(ARM_SIZE) -B --totals build/arm/v2_decomp_s.a | grep TOTALS | awk '{print $$1+$$2}'); \ + size_full=$$($(ARM_SIZE) -B --totals build/arm/v2_full_s.a | grep TOTALS | awk '{print $$1+$$2}'); \ + printf 'Tamp v2 (C) %d %d %d\n' $$size_comp $$size_decomp $$size_full + +c-size: c-size-v1 c-size-v2 binary-size: @echo "Binary sizes for armv6m (bytes):" diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index aa1999e8..690fa554 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -10,18 +10,15 @@ #if TAMP_V2_DECOMPRESS /* Pending symbol states for v2 decode suspend/resume. * - 0: No pending operation - * - 12: RLE - pending_ext_huffman holds partial huffman result or 0xFF for fresh/output-resume - * - 13: Extended match - fresh (need window_offset) - * - 14: Extended match - have window_offset in pending_window_offset (need huffman+trailing) - * - 15: Extended match - have window_offset and huffman (need trailing only) + * - 12: RLE (fresh or resume based on skip_bytes) + * - 13: Extended match - have match_size, need window_offset + * - 14: Extended match - fresh decode (need both size and offset) * When skip_bytes > 0, we're resuming after output-full with full decode saved. */ #define PENDING_NONE 0 #define PENDING_RLE 12 -#define PENDING_EXT_FRESH 13 -#define PENDING_EXT_HAVE_OFFSET 14 -#define PENDING_EXT_HAVE_HUFFMAN 15 -#define PARTIAL_STATE_NONE 0xFF +#define PENDING_EXT_NEED_OFFSET 13 +#define PENDING_EXT_FRESH 14 #endif /** @@ -72,8 +69,8 @@ static inline int8_t huffman_decode(uint32_t *bit_buffer, uint8_t *bit_buffer_po /** * @brief Decode huffman symbol + trailing bits from bit buffer. * - * Shared helper for RLE and extended match decoding. - * Uses pending_ext_huffman to track partial decode state. + * Simple helper that decodes from local copies. On failure, decompressor + * state is not modified. Caller is responsible for state management. * * @param d Decompressor state * @param trailing_bits Number of trailing bits to read (3 or 4) @@ -83,21 +80,13 @@ static inline int8_t huffman_decode(uint32_t *bit_buffer, uint8_t *bit_buffer_po static tamp_res decode_huffman_trailing(TampDecompressor *d, uint8_t trailing_bits, uint16_t *result) { uint32_t bit_buffer = d->bit_buffer; uint8_t bit_buffer_pos = d->bit_buffer_pos; - int8_t huffman_value; - if (d->pending_ext_huffman != PARTIAL_STATE_NONE) { - huffman_value = d->pending_ext_huffman; - } else { - if (TAMP_UNLIKELY(bit_buffer_pos < 1)) return TAMP_INPUT_EXHAUSTED; - huffman_value = huffman_decode(&bit_buffer, &bit_buffer_pos); - } + /* Need at least 1 bit for huffman, plus trailing bits */ + if (TAMP_UNLIKELY(bit_buffer_pos < 1 + trailing_bits)) return TAMP_INPUT_EXHAUSTED; - if (TAMP_UNLIKELY(bit_buffer_pos < trailing_bits)) { - d->bit_buffer = bit_buffer; - d->bit_buffer_pos = bit_buffer_pos; - d->pending_ext_huffman = huffman_value; - return TAMP_INPUT_EXHAUSTED; - } + int8_t huffman_value = huffman_decode(&bit_buffer, &bit_buffer_pos); + + if (TAMP_UNLIKELY(bit_buffer_pos < trailing_bits)) return TAMP_INPUT_EXHAUSTED; uint8_t trailing = bit_buffer >> (32 - trailing_bits); bit_buffer <<= trailing_bits; @@ -105,15 +94,18 @@ static tamp_res decode_huffman_trailing(TampDecompressor *d, uint8_t trailing_bi *result = (huffman_value << trailing_bits) + trailing; + /* Commit only on success */ d->bit_buffer = bit_buffer; d->bit_buffer_pos = bit_buffer_pos; - d->pending_ext_huffman = PARTIAL_STATE_NONE; return TAMP_OK; } /** * @brief Decode RLE token and write repeated bytes to output. + * + * RLE format: huffman(count_high) + trailing_bits(count_low) + * rle_count = (count_high << 4) + count_low + 2 */ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, size_t *output_written_size, uint16_t window_mask) { @@ -121,8 +113,10 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un uint16_t skip = d->skip_bytes; if (skip > 0) { + /* Resume from output-full: rle_count saved in pending_window_offset */ rle_count = d->pending_window_offset; } else { + /* Fresh decode */ uint16_t raw; tamp_res res = decode_huffman_trailing(d, TAMP_LEADING_RLE_BITS, &raw); if (res != TAMP_OK) return res; @@ -139,7 +133,7 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un uint16_t to_write; if (TAMP_UNLIKELY(remaining_count > output_space)) { - /* Partial write */ + /* Partial write - save state for resume */ to_write = output_space; d->skip_bytes = skip + output_space; d->pending_symbol = PENDING_RLE; @@ -172,6 +166,14 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un /** * @brief Decode extended match token and copy from window to output. + * + * NEW FORMAT: huffman(size_high) + trailing_bits(size_low) + window_offset + * match_size = (size_high << 3) + size_low + min_pattern_size + 12 + * + * State machine: + * - Fresh: decode huffman+trailing, then window_offset + * - PENDING_EXT_NEED_OFFSET: have match_size, need window_offset + * - Output-full resume (skip > 0): have both match_size and window_offset */ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, size_t *output_written_size, uint8_t conf_window, uint8_t min_pattern_size, @@ -179,36 +181,36 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu uint16_t window_offset; uint16_t match_size; uint16_t skip = d->skip_bytes; - uint8_t pending = d->pending_symbol; if (skip > 0) { - /* Resume from output-full: window_offset and match_size already saved */ + /* Resume from output-full: both values saved */ window_offset = d->pending_window_offset; - match_size = d->pending_ext_huffman; + match_size = d->pending_match_size; + } else if (d->pending_symbol == PENDING_EXT_NEED_OFFSET) { + /* Resume: have match_size, need window_offset */ + match_size = d->pending_match_size; + + if (TAMP_UNLIKELY(d->bit_buffer_pos < conf_window)) return TAMP_INPUT_EXHAUSTED; + window_offset = d->bit_buffer >> (32 - conf_window); + d->bit_buffer <<= conf_window; + d->bit_buffer_pos -= conf_window; } else { - /* Step 1: Get window_offset (saved or decode fresh) */ - if (pending >= PENDING_EXT_HAVE_OFFSET) { - window_offset = d->pending_window_offset; - } else { - if (TAMP_UNLIKELY(d->bit_buffer_pos < conf_window)) return TAMP_INPUT_EXHAUSTED; - window_offset = d->bit_buffer >> (32 - conf_window); - d->bit_buffer <<= conf_window; - d->bit_buffer_pos -= conf_window; - /* Save window_offset in case huffman+trailing needs more input */ - d->pending_window_offset = window_offset; - } - - /* Step 2: Decode huffman + trailing bits */ + /* Fresh decode: huffman+trailing first, then window_offset */ uint16_t raw; tamp_res res = decode_huffman_trailing(d, TAMP_LEADING_EXTENDED_MATCH_BITS, &raw); - if (res != TAMP_OK) { - /* Update pending_symbol based on where we stopped */ - d->pending_symbol = - (d->pending_ext_huffman != PARTIAL_STATE_NONE) ? PENDING_EXT_HAVE_HUFFMAN : PENDING_EXT_HAVE_OFFSET; - return res; - } - + if (res != TAMP_OK) return res; match_size = raw + min_pattern_size + 12; + + /* Now decode window_offset */ + if (TAMP_UNLIKELY(d->bit_buffer_pos < conf_window)) { + /* Save match_size and return */ + d->pending_symbol = PENDING_EXT_NEED_OFFSET; + d->pending_match_size = match_size; + return TAMP_INPUT_EXHAUSTED; + } + window_offset = d->bit_buffer >> (32 - conf_window); + d->bit_buffer <<= conf_window; + d->bit_buffer_pos -= conf_window; } /* Security check: validate window bounds */ @@ -224,12 +226,12 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu uint16_t to_write; if (TAMP_UNLIKELY(remaining_count > output_space)) { - /* Partial write */ + /* Partial write - save state for resume */ to_write = output_space; d->skip_bytes = skip + output_space; - d->pending_symbol = PENDING_EXT_FRESH; + d->pending_symbol = PENDING_EXT_NEED_OFFSET; /* Reuse for output-full */ d->pending_window_offset = window_offset; - d->pending_ext_huffman = match_size; + d->pending_match_size = match_size; } else { /* Complete write */ to_write = remaining_count; @@ -480,12 +482,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne #if TAMP_V2_DECOMPRESS /* Check for v2 symbols */ if (TAMP_UNLIKELY(v2_enabled && match_size >= TAMP_RLE_SYMBOL)) { - /* Commit bit buffer and set pending symbol before calling helper. - * Initialize partial state fields to indicate fresh decode. */ + /* Commit bit buffer before calling helper. */ decompressor->bit_buffer = bit_buffer; decompressor->bit_buffer_pos = bit_buffer_pos; - decompressor->pending_window_offset = 0; - decompressor->pending_ext_huffman = PARTIAL_STATE_NONE; tamp_res v2_res; if (match_size == TAMP_RLE_SYMBOL) { diff --git a/tamp/_c_src/tamp/decompressor.h b/tamp/_c_src/tamp/decompressor.h index a7c8a58e..df78c3d9 100644 --- a/tamp/_c_src/tamp/decompressor.h +++ b/tamp/_c_src/tamp/decompressor.h @@ -18,10 +18,9 @@ typedef struct { uint16_t window_pos; // Current position in window (15 bits) uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits) #if TAMP_V2_DECOMPRESS - uint8_t pending_symbol; // State machine: 0=none, 12=RLE, 13=ext match need window, - // 14=ext match need length, 15=ext match need raw bits - uint16_t pending_window_offset; // Saved window_offset for extended match resume - uint8_t pending_ext_huffman; // Saved ext_huffman for extended match resume (state 15) + uint8_t pending_symbol; // State machine: 0=none, 12=RLE, 13=ext need offset, 14=ext fresh + uint16_t pending_window_offset; // Saved window_offset for extended match output-full resume + uint16_t pending_match_size; // Saved match_size for extended match resume #endif /* WARM: read once at start of decompress, cached in locals */ @@ -34,7 +33,7 @@ typedef struct { /* COLD: rarely accessed (init or edge cases). * Bitfields save space; add new cold fields here. */ - uint8_t skip_bytes : 4; // For output-buffer-limited resumption + uint8_t skip_bytes; // For output-buffer-limited resumption (v2 needs >4 bits) uint8_t window_bits_max : 4; // Max window bits buffer can hold uint8_t configured : 1; // Whether config has been set #if TAMP_V2_DECOMPRESS diff --git a/tamp/compressor.py b/tamp/compressor.py index 7b4b2a79..906477cc 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -413,12 +413,13 @@ def _write_extended_match(self): self.extended_match_cb( self._window_buffer.pos, self._extended_match_position, self._extended_match_count, string ) + # Format: symbol, size (huffman+trailing), position bytes_written += self._bit_writer.write_huffman_and_literal_flag(_EXTENDED_MATCH_SYMBOL) - bytes_written += self._bit_writer.write(self._extended_match_position, self.window_bits) bytes_written += self._write_extended_huffman( self._extended_match_count - self.min_pattern_size - 11 - 1, _LEADING_EXTENDED_MATCH_HUFFMAN_BITS, ) + bytes_written += self._bit_writer.write(self._extended_match_position, self.window_bits) self._window_buffer.write_from_self(self._extended_match_position, self._extended_match_count) diff --git a/tamp/decompressor.py b/tamp/decompressor.py index 947dd023..e095489e 100644 --- a/tamp/decompressor.py +++ b/tamp/decompressor.py @@ -260,11 +260,12 @@ def write_to_output(string): self._window_buffer.write_bytes(string[: min(rle_count, _RLE_MAX_WINDOW)]) self._rle_last_written = True elif match_size == _EXTENDED_MATCH_SYMBOL: - index = self._bit_reader.read(self.window_bits) + # Format: size (huffman+trailing), then position match_size = self._bit_reader.read_huffman() match_size <<= _LEADING_EXTENDED_MATCH_HUFFMAN_BITS match_size += self._bit_reader.read(_LEADING_EXTENDED_MATCH_HUFFMAN_BITS) match_size += self.min_pattern_size + 11 + 1 + index = self._bit_reader.read(self.window_bits) string = self._window_buffer.get(index, match_size) From b377d18fcfa4b5d891b5121d34d8759b8f68c03f Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 14:47:40 -0500 Subject: [PATCH 009/109] more cleanup --- tamp/_c_src/tamp/decompressor.c | 39 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 690fa554..39201c22 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -350,6 +350,21 @@ tamp_res tamp_decompressor_init(TampDecompressor *decompressor, const TampConf * return res; } +/** + * @brief Refill bit buffer from input stream. + * + * Consumes bytes from input until bit_buffer has at least 25 bits or input is exhausted. + */ +static inline void refill_bit_buffer(TampDecompressor *d, const unsigned char **input, const unsigned char *input_end, + size_t *input_consumed_size) { + while (*input != input_end && d->bit_buffer_pos <= 24) { + d->bit_buffer_pos += 8; + d->bit_buffer |= (uint32_t) * (*input) << (32 - d->bit_buffer_pos); + (*input)++; + (*input_consumed_size)++; + } +} + tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigned char *output, size_t output_size, size_t *output_written_size, const unsigned char *input, size_t input_size, size_t *input_consumed_size, tamp_callback_t callback, void *user_data) { @@ -390,23 +405,13 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne const bool v2_enabled = decompressor->conf_v2; #endif -/* Macro to refill bit buffer from input. Used before returning TAMP_INPUT_EXHAUSTED - * to ensure we consume all available input first. */ -#define REFILL() \ - while (input != input_end && decompressor->bit_buffer_pos <= 24) { \ - decompressor->bit_buffer_pos += 8; \ - decompressor->bit_buffer |= (uint32_t) * input << (32 - decompressor->bit_buffer_pos); \ - input++; \ - (*input_consumed_size)++; \ - } - while (input != input_end || decompressor->bit_buffer_pos #if TAMP_V2_DECOMPRESS || decompressor->pending_symbol #endif ) { // Populate the bit buffer - REFILL(); + refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); #if TAMP_V2_DECOMPRESS /* Resume pending v2 operation. Retry after refill if helper needs more bits. */ @@ -420,7 +425,7 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne min_pattern_size, window_mask); } if (v2_res == TAMP_INPUT_EXHAUSTED) { - REFILL(); + refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); if (input == input_end) return TAMP_INPUT_EXHAUSTED; continue; /* Retry with refilled buffer */ } @@ -497,13 +502,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne } else { return TAMP_ERROR; /* Invalid v2 symbol */ } - /* On success, helper clears pending_symbol; on error, it stays set for resume */ - if (v2_res == TAMP_INPUT_EXHAUSTED) { - REFILL(); - if (input == input_end) return TAMP_INPUT_EXHAUSTED; - continue; /* Retry with refilled buffer */ - } - if (v2_res != TAMP_OK) return v2_res; + /* On success, helper clears pending_symbol; on error, it stays set for resume. + * TAMP_INPUT_EXHAUSTED is handled by resume path on next iteration. */ + if (v2_res == TAMP_OUTPUT_FULL || v2_res < TAMP_OK) return v2_res; continue; } #endif From 667e3cfc0fd1f7353218a8f42312701195a0cf24 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 18:35:04 -0500 Subject: [PATCH 010/109] remove rle_last_written check; provides very small benefits, but unnecessarily bloats c-decompressor --- tamp/_c_src/tamp/decompressor.c | 32 +++++++++++--------------------- tamp/_c_src/tamp/decompressor.h | 5 +---- tamp/compressor.py | 16 ++++------------ tamp/decompressor.py | 10 +++------- 4 files changed, 19 insertions(+), 44 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 39201c22..f370cd8e 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -8,17 +8,13 @@ #define FLUSH 15 #if TAMP_V2_DECOMPRESS -/* Pending symbol states for v2 decode suspend/resume. - * - 0: No pending operation - * - 12: RLE (fresh or resume based on skip_bytes) - * - 13: Extended match - have match_size, need window_offset - * - 14: Extended match - fresh decode (need both size and offset) +/* Pending symbol states for v2 decode suspend/resume (2 bits). * When skip_bytes > 0, we're resuming after output-full with full decode saved. */ #define PENDING_NONE 0 -#define PENDING_RLE 12 -#define PENDING_EXT_NEED_OFFSET 13 -#define PENDING_EXT_FRESH 14 +#define PENDING_RLE 1 +#define PENDING_EXT_NEED_OFFSET 2 +#define PENDING_EXT_FRESH 3 #endif /** @@ -151,15 +147,16 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un } *output_written_size += to_write; - /* Update window only on first chunk (skip==0) and not after another RLE */ - if (skip == 0 && !d->rle_last_written) { - uint16_t window_write = (rle_count < TAMP_RLE_MAX_WINDOW) ? rle_count : TAMP_RLE_MAX_WINDOW; + /* Update window only on first chunk (skip==0). + * Write up to TAMP_RLE_MAX_WINDOW or until end of buffer (no wrap). */ + if (skip == 0) { + uint16_t remaining = (window_mask + 1) - d->window_pos; + uint16_t window_write = MIN(MIN(rle_count, TAMP_RLE_MAX_WINDOW), remaining); for (uint16_t i = 0; i < window_write; i++) { - d->window[d->window_pos] = symbol; - d->window_pos = (d->window_pos + 1) & window_mask; + d->window[d->window_pos++] = symbol; } + d->window_pos &= window_mask; } - d->rle_last_written = 1; return (d->pending_symbol == PENDING_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; } @@ -254,7 +251,6 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu wp = (wp + 1) & window_mask; } d->window_pos = wp; - d->rle_last_written = 0; } return (d->pending_symbol == PENDING_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; @@ -455,9 +451,6 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne output++; (*output_written_size)++; -#if TAMP_V2_DECOMPRESS - decompressor->rle_last_written = 0; -#endif } else { // is token; attempt a decode /* copy the bit buffers so that we can abort at any time */ @@ -554,9 +547,6 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne uint16_t wp = decompressor->window_pos; window_copy(decompressor->window, &wp, window_offset, match_size, window_mask); decompressor->window_pos = wp; -#if TAMP_V2_DECOMPRESS - decompressor->rle_last_written = 0; -#endif } } if (TAMP_UNLIKELY(callback && (res = callback(user_data, *output_written_size, input_size)))) diff --git a/tamp/_c_src/tamp/decompressor.h b/tamp/_c_src/tamp/decompressor.h index df78c3d9..8a3ff207 100644 --- a/tamp/_c_src/tamp/decompressor.h +++ b/tamp/_c_src/tamp/decompressor.h @@ -18,7 +18,7 @@ typedef struct { uint16_t window_pos; // Current position in window (15 bits) uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits) #if TAMP_V2_DECOMPRESS - uint8_t pending_symbol; // State machine: 0=none, 12=RLE, 13=ext need offset, 14=ext fresh + uint8_t pending_symbol : 2; // State machine: 0=none, 1=RLE, 2=ext need offset, 3=ext fresh uint16_t pending_window_offset; // Saved window_offset for extended match output-full resume uint16_t pending_match_size; // Saved match_size for extended match resume #endif @@ -36,9 +36,6 @@ typedef struct { uint8_t skip_bytes; // For output-buffer-limited resumption (v2 needs >4 bits) uint8_t window_bits_max : 4; // Max window bits buffer can hold uint8_t configured : 1; // Whether config has been set -#if TAMP_V2_DECOMPRESS - uint8_t rle_last_written : 1; // Previous write was RLE (skip window re-write) -#endif } TampDecompressor; /** diff --git a/tamp/compressor.py b/tamp/compressor.py index 906477cc..b2544abb 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -189,7 +189,6 @@ def __init__( self.v2: bool = v2 self._rle_count = 0 - self._rle_last_written = False # The previous write was an RLE token # "+1" Because a RLE of 1 is not valid. self._rle_max_size = (13 << _LEADING_RLE_HUFFMAN_BITS) + (1 << _LEADING_RLE_HUFFMAN_BITS) + 1 @@ -368,7 +367,6 @@ def _compress_input_buffer_single(self) -> int: else: bytes_written += self._write_match(search_i, match) - self._rle_last_written = False for _ in range(match_size): self._input_buffer.popleft() else: @@ -438,7 +436,6 @@ def _write_literal(self, literal) -> int: bytes_written += self._bit_writer.write(literal | self.literal_flag, self.literal_bits + 1) self._window_buffer.write_byte(literal) - self._rle_last_written = False return bytes_written def _write_match(self, search_i, match) -> int: @@ -456,7 +453,6 @@ def _write_match(self, search_i, match) -> int: bytes_written += self._bit_writer.write_huffman_and_literal_flag(match_size - self.min_pattern_size) bytes_written += self._bit_writer.write(search_i, self.window_bits) self._window_buffer.write_bytes(match) - self._rle_last_written = False return bytes_written def _write_rle(self) -> int: @@ -474,12 +470,10 @@ def _write_rle(self) -> int: bytes_written += self._bit_writer.write_huffman_and_literal_flag(_RLE_SYMBOL) bytes_written += self._write_extended_huffman(self._rle_count - 2, _LEADING_RLE_HUFFMAN_BITS) - if not self._rle_last_written: - # Only write up to 8 bytes, and only if we didn't already do this. - # This prevents filling up the window buffer with unhelpful data. - self._window_buffer.write_bytes(bytes([last_written_byte]) * min(self._rle_count, _RLE_MAX_WINDOW)) - - self._rle_last_written = True + # Write up to 8 bytes (or until end of buffer) to the window. + remaining = self._window_buffer.size - self._window_buffer.pos + window_write = min(self._rle_count, _RLE_MAX_WINDOW, remaining) + self._window_buffer.write_bytes(bytes([last_written_byte]) * window_write) self._rle_count = 0 return bytes_written @@ -546,8 +540,6 @@ def flush(self, write_token: bool = True) -> int: bytes_written_flush = self._bit_writer.flush(write_token=write_token) bytes_written += bytes_written_flush - if bytes_written_flush: - self._rle_last_written = False return bytes_written def close(self) -> int: diff --git a/tamp/decompressor.py b/tamp/decompressor.py index e095489e..38ff50a4 100644 --- a/tamp/decompressor.py +++ b/tamp/decompressor.py @@ -181,7 +181,6 @@ def __init__(self, f, *, dictionary: Optional[bytearray] = None): uses_custom_dictionary = self._bit_reader.read(1) self.v2 = self._bit_reader.read(1) more_header_bytes = self._bit_reader.read(1) - self._rle_last_written = False if more_header_bytes: raise NotImplementedError @@ -242,7 +241,6 @@ def write_to_output(string): if is_literal: string = bytes([self._bit_reader.read(self.literal_bits)]) self._window_buffer.write_bytes(string) - self._rle_last_written = False else: match_size = self._bit_reader.read_huffman() if match_size is _FLUSH: @@ -256,9 +254,9 @@ def write_to_output(string): rle_count += 1 + 1 symbol = self._window_buffer.last_written_byte string = bytes([symbol]) * rle_count - if not self._rle_last_written: - self._window_buffer.write_bytes(string[: min(rle_count, _RLE_MAX_WINDOW)]) - self._rle_last_written = True + remaining = self._window_buffer.size - self._window_buffer.pos + window_write = min(rle_count, _RLE_MAX_WINDOW, remaining) + self._window_buffer.write_bytes(string[:window_write]) elif match_size == _EXTENDED_MATCH_SYMBOL: # Format: size (huffman+trailing), then position match_size = self._bit_reader.read_huffman() @@ -270,7 +268,6 @@ def write_to_output(string): string = self._window_buffer.get(index, match_size) self._window_buffer.write_bytes(string) - self._rle_last_written = False else: raise ValueError("unreachable") else: @@ -279,7 +276,6 @@ def write_to_output(string): string = self._window_buffer.get(index, match_size) self._window_buffer.write_bytes(string) - self._rle_last_written = False if not write_to_output(string): break From aea413db85de19337bfd3944f2ed5db965959f44 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 20:01:39 -0500 Subject: [PATCH 011/109] rename pending_symbol to token_state. --- tamp/_c_src/tamp/decompressor.c | 46 ++++++++++++++++----------------- tamp/_c_src/tamp/decompressor.h | 2 +- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index f370cd8e..17f1d02c 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -8,13 +8,11 @@ #define FLUSH 15 #if TAMP_V2_DECOMPRESS -/* Pending symbol states for v2 decode suspend/resume (2 bits). - * When skip_bytes > 0, we're resuming after output-full with full decode saved. - */ -#define PENDING_NONE 0 -#define PENDING_RLE 1 -#define PENDING_EXT_NEED_OFFSET 2 -#define PENDING_EXT_FRESH 3 +/* Token state for v2 decode suspend/resume (2 bits). */ +#define TOKEN_NONE 0 +#define TOKEN_RLE 1 +#define TOKEN_EXT_MATCH 2 +#define TOKEN_EXT_MATCH_FRESH 3 #endif /** @@ -43,7 +41,7 @@ static const uint8_t HUFFMAN_TABLE[128] = { * * @returns Decoded match_size */ -static inline int8_t huffman_decode(uint32_t *bit_buffer, uint8_t *bit_buffer_pos) { +static int8_t huffman_decode(uint32_t *bit_buffer, uint8_t *bit_buffer_pos) { uint8_t code; uint8_t bit_len; @@ -132,13 +130,13 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un /* Partial write - save state for resume */ to_write = output_space; d->skip_bytes = skip + output_space; - d->pending_symbol = PENDING_RLE; + d->token_state = TOKEN_RLE; d->pending_window_offset = rle_count; } else { /* Complete write */ to_write = remaining_count; d->skip_bytes = 0; - d->pending_symbol = PENDING_NONE; + d->token_state = TOKEN_NONE; } /* Write repeated bytes to output */ @@ -158,7 +156,7 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un d->window_pos &= window_mask; } - return (d->pending_symbol == PENDING_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; + return (d->token_state == TOKEN_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; } /** @@ -169,7 +167,7 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un * * State machine: * - Fresh: decode huffman+trailing, then window_offset - * - PENDING_EXT_NEED_OFFSET: have match_size, need window_offset + * - TOKEN_EXT_MATCH: have match_size, need window_offset * - Output-full resume (skip > 0): have both match_size and window_offset */ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, @@ -183,7 +181,7 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu /* Resume from output-full: both values saved */ window_offset = d->pending_window_offset; match_size = d->pending_match_size; - } else if (d->pending_symbol == PENDING_EXT_NEED_OFFSET) { + } else if (d->token_state == TOKEN_EXT_MATCH) { /* Resume: have match_size, need window_offset */ match_size = d->pending_match_size; @@ -201,7 +199,7 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu /* Now decode window_offset */ if (TAMP_UNLIKELY(d->bit_buffer_pos < conf_window)) { /* Save match_size and return */ - d->pending_symbol = PENDING_EXT_NEED_OFFSET; + d->token_state = TOKEN_EXT_MATCH; d->pending_match_size = match_size; return TAMP_INPUT_EXHAUSTED; } @@ -226,14 +224,14 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu /* Partial write - save state for resume */ to_write = output_space; d->skip_bytes = skip + output_space; - d->pending_symbol = PENDING_EXT_NEED_OFFSET; /* Reuse for output-full */ + d->token_state = TOKEN_EXT_MATCH; /* Reuse for output-full */ d->pending_window_offset = window_offset; d->pending_match_size = match_size; } else { /* Complete write */ to_write = remaining_count; d->skip_bytes = 0; - d->pending_symbol = PENDING_NONE; + d->token_state = TOKEN_NONE; } /* Copy from window to output */ @@ -244,7 +242,7 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu *output_written_size += to_write; /* Update window only on complete decode */ - if (d->pending_symbol == PENDING_NONE) { + if (d->token_state == TOKEN_NONE) { uint16_t wp = d->window_pos; for (uint16_t i = 0; i < match_size; i++) { d->window[wp] = d->window[(window_offset + i) & window_mask]; @@ -253,7 +251,7 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu d->window_pos = wp; } - return (d->pending_symbol == PENDING_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; + return (d->token_state == TOKEN_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; } #endif /* TAMP_V2_DECOMPRESS */ @@ -403,7 +401,7 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne while (input != input_end || decompressor->bit_buffer_pos #if TAMP_V2_DECOMPRESS - || decompressor->pending_symbol + || decompressor->token_state #endif ) { // Populate the bit buffer @@ -411,10 +409,10 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne #if TAMP_V2_DECOMPRESS /* Resume pending v2 operation. Retry after refill if helper needs more bits. */ - if (TAMP_UNLIKELY(decompressor->pending_symbol)) { + if (TAMP_UNLIKELY(decompressor->token_state)) { if (TAMP_UNLIKELY(output == output_end)) return TAMP_OUTPUT_FULL; tamp_res v2_res; - if (decompressor->pending_symbol == PENDING_RLE) { + if (decompressor->token_state == TOKEN_RLE) { v2_res = decode_rle(decompressor, &output, output_end, output_written_size, window_mask); } else { v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size, conf_window, @@ -486,16 +484,16 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne tamp_res v2_res; if (match_size == TAMP_RLE_SYMBOL) { - decompressor->pending_symbol = PENDING_RLE; + decompressor->token_state = TOKEN_RLE; v2_res = decode_rle(decompressor, &output, output_end, output_written_size, window_mask); } else if (match_size == TAMP_EXTENDED_MATCH_SYMBOL) { - decompressor->pending_symbol = PENDING_EXT_FRESH; + decompressor->token_state = TOKEN_EXT_MATCH_FRESH; v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size, conf_window, min_pattern_size, window_mask); } else { return TAMP_ERROR; /* Invalid v2 symbol */ } - /* On success, helper clears pending_symbol; on error, it stays set for resume. + /* On success, helper clears token_state; on error, it stays set for resume. * TAMP_INPUT_EXHAUSTED is handled by resume path on next iteration. */ if (v2_res == TAMP_OUTPUT_FULL || v2_res < TAMP_OK) return v2_res; continue; diff --git a/tamp/_c_src/tamp/decompressor.h b/tamp/_c_src/tamp/decompressor.h index 8a3ff207..8aae31be 100644 --- a/tamp/_c_src/tamp/decompressor.h +++ b/tamp/_c_src/tamp/decompressor.h @@ -18,7 +18,7 @@ typedef struct { uint16_t window_pos; // Current position in window (15 bits) uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits) #if TAMP_V2_DECOMPRESS - uint8_t pending_symbol : 2; // State machine: 0=none, 1=RLE, 2=ext need offset, 3=ext fresh + uint8_t token_state : 2; // 0=none, 1=RLE, 2=ext match, 3=ext match fresh uint16_t pending_window_offset; // Saved window_offset for extended match output-full resume uint16_t pending_match_size; // Saved match_size for extended match resume #endif From 184627098b5bc1723017fa6a2596963d6670171c Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 20:11:44 -0500 Subject: [PATCH 012/109] Make decompression 1% slower to save 200 bytes in firmware --- tamp/_c_src/tamp/decompressor.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 17f1d02c..0d98c221 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -102,7 +102,8 @@ static tamp_res decode_huffman_trailing(TampDecompressor *d, uint8_t trailing_bi * rle_count = (count_high << 4) + count_low + 2 */ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, - size_t *output_written_size, uint16_t window_mask) { + size_t *output_written_size) { + const uint16_t window_mask = (1u << d->conf_window) - 1; uint16_t rle_count; uint16_t skip = d->skip_bytes; @@ -171,8 +172,9 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un * - Output-full resume (skip > 0): have both match_size and window_offset */ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, - size_t *output_written_size, uint8_t conf_window, uint8_t min_pattern_size, - uint16_t window_mask) { + size_t *output_written_size) { + const uint8_t conf_window = d->conf_window; + const uint16_t window_mask = (1u << conf_window) - 1; uint16_t window_offset; uint16_t match_size; uint16_t skip = d->skip_bytes; @@ -194,7 +196,7 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu uint16_t raw; tamp_res res = decode_huffman_trailing(d, TAMP_LEADING_EXTENDED_MATCH_BITS, &raw); if (res != TAMP_OK) return res; - match_size = raw + min_pattern_size + 12; + match_size = raw + d->min_pattern_size + 12; /* Now decode window_offset */ if (TAMP_UNLIKELY(d->bit_buffer_pos < conf_window)) { @@ -413,10 +415,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne if (TAMP_UNLIKELY(output == output_end)) return TAMP_OUTPUT_FULL; tamp_res v2_res; if (decompressor->token_state == TOKEN_RLE) { - v2_res = decode_rle(decompressor, &output, output_end, output_written_size, window_mask); + v2_res = decode_rle(decompressor, &output, output_end, output_written_size); } else { - v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size, conf_window, - min_pattern_size, window_mask); + v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size); } if (v2_res == TAMP_INPUT_EXHAUSTED) { refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); @@ -485,11 +486,10 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne tamp_res v2_res; if (match_size == TAMP_RLE_SYMBOL) { decompressor->token_state = TOKEN_RLE; - v2_res = decode_rle(decompressor, &output, output_end, output_written_size, window_mask); + v2_res = decode_rle(decompressor, &output, output_end, output_written_size); } else if (match_size == TAMP_EXTENDED_MATCH_SYMBOL) { decompressor->token_state = TOKEN_EXT_MATCH_FRESH; - v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size, conf_window, - min_pattern_size, window_mask); + v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size); } else { return TAMP_ERROR; /* Invalid v2 symbol */ } From ce3bd9442cb894cac1e9301cab8374244dd3da09 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 20:14:52 -0500 Subject: [PATCH 013/109] narrow variable scope --- tamp/_c_src/tamp/decompressor.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 0d98c221..9e5bcca0 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -103,7 +103,6 @@ static tamp_res decode_huffman_trailing(TampDecompressor *d, uint8_t trailing_bi */ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, size_t *output_written_size) { - const uint16_t window_mask = (1u << d->conf_window) - 1; uint16_t rle_count; uint16_t skip = d->skip_bytes; @@ -119,7 +118,7 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un } /* Get the byte to repeat (last written byte) */ - uint16_t prev_pos = (d->window_pos == 0) ? window_mask : (d->window_pos - 1); + uint16_t prev_pos = (d->window_pos - 1) & ((1u << d->conf_window) - 1); uint8_t symbol = d->window[prev_pos]; /* Calculate how many to write this call */ @@ -149,12 +148,13 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un /* Update window only on first chunk (skip==0). * Write up to TAMP_RLE_MAX_WINDOW or until end of buffer (no wrap). */ if (skip == 0) { - uint16_t remaining = (window_mask + 1) - d->window_pos; + const uint16_t window_size = 1u << d->conf_window; + uint16_t remaining = window_size - d->window_pos; uint16_t window_write = MIN(MIN(rle_count, TAMP_RLE_MAX_WINDOW), remaining); for (uint16_t i = 0; i < window_write; i++) { d->window[d->window_pos++] = symbol; } - d->window_pos &= window_mask; + d->window_pos &= (window_size - 1); } return (d->token_state == TOKEN_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; @@ -174,7 +174,6 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, size_t *output_written_size) { const uint8_t conf_window = d->conf_window; - const uint16_t window_mask = (1u << conf_window) - 1; uint16_t window_offset; uint16_t match_size; uint16_t skip = d->skip_bytes; @@ -245,6 +244,7 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu /* Update window only on complete decode */ if (d->token_state == TOKEN_NONE) { + const uint16_t window_mask = (1u << conf_window) - 1; uint16_t wp = d->window_pos; for (uint16_t i = 0; i < match_size; i++) { d->window[wp] = d->window[(window_offset + i) & window_mask]; From 34ad99cd476a1e05b7b8362f8a2d3a61b9352a40 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 21:45:04 -0500 Subject: [PATCH 014/109] remove extended-match wrapping logic. --- tamp/_c_src/tamp/decompressor.c | 13 +++++++------ tamp/compressor.py | 10 +++++++--- tamp/decompressor.py | 5 ++++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 9e5bcca0..39d14e1a 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -242,15 +242,16 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu } *output_written_size += to_write; - /* Update window only on complete decode */ + /* Update window only on complete decode. + * Write up to end of buffer (no wrap), mask wp only at the end. */ if (d->token_state == TOKEN_NONE) { - const uint16_t window_mask = (1u << conf_window) - 1; + uint16_t remaining = window_size - d->window_pos; + uint16_t window_write = (match_size < remaining) ? match_size : remaining; uint16_t wp = d->window_pos; - for (uint16_t i = 0; i < match_size; i++) { - d->window[wp] = d->window[(window_offset + i) & window_mask]; - wp = (wp + 1) & window_mask; + for (uint16_t i = 0; i < window_write; i++) { + d->window[wp++] = d->window[window_offset + i]; } - d->window_pos = wp; + d->window_pos = wp & (window_size - 1); } return (d->token_state == TOKEN_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; diff --git a/tamp/compressor.py b/tamp/compressor.py index b2544abb..5194515d 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -125,9 +125,13 @@ def index(self, pattern, start): return result def write_from_self(self, position, size): - data = [self.buffer[(position + i) % self.size] for i in range(size)] - for x in data: - self.write_byte(x) + # Write up to end of buffer (no wrap) + remaining = self.size - self.pos + window_write = min(size, remaining) + for i in range(window_write): + self.buffer[self.pos] = self.buffer[position + i] + self.pos += 1 + self.pos %= self.size def get(self, index, size): out = bytearray(size) diff --git a/tamp/decompressor.py b/tamp/decompressor.py index 38ff50a4..a61ad5d5 100644 --- a/tamp/decompressor.py +++ b/tamp/decompressor.py @@ -267,7 +267,10 @@ def write_to_output(string): string = self._window_buffer.get(index, match_size) - self._window_buffer.write_bytes(string) + # Write up to end of buffer (no wrap) + remaining = self._window_buffer.size - self._window_buffer.pos + window_write = min(match_size, remaining) + self._window_buffer.write_bytes(string[:window_write]) else: raise ValueError("unreachable") else: From 4cdb50fb2dca56b0ae9d66b61735e5a2b3882057 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 22:03:45 -0500 Subject: [PATCH 015/109] move TAMP_OUTPUT_FULL logic to top of loop --- tamp/_c_src/tamp/decompressor.c | 5 ++--- tamp/_c_src/tamp/decompressor.h | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 39d14e1a..af87e2d8 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -407,13 +407,14 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne || decompressor->token_state #endif ) { + if (TAMP_UNLIKELY(output == output_end)) return TAMP_OUTPUT_FULL; + // Populate the bit buffer refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); #if TAMP_V2_DECOMPRESS /* Resume pending v2 operation. Retry after refill if helper needs more bits. */ if (TAMP_UNLIKELY(decompressor->token_state)) { - if (TAMP_UNLIKELY(output == output_end)) return TAMP_OUTPUT_FULL; tamp_res v2_res; if (decompressor->token_state == TOKEN_RLE) { v2_res = decode_rle(decompressor, &output, output_end, output_written_size); @@ -432,8 +433,6 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne if (TAMP_UNLIKELY(decompressor->bit_buffer_pos == 0)) return TAMP_INPUT_EXHAUSTED; - if (TAMP_UNLIKELY(output == output_end)) return TAMP_OUTPUT_FULL; - // Hint that patterns are more likely than literals if (TAMP_UNLIKELY(decompressor->bit_buffer >> 31)) { // is literal diff --git a/tamp/_c_src/tamp/decompressor.h b/tamp/_c_src/tamp/decompressor.h index 8aae31be..d12c98a9 100644 --- a/tamp/_c_src/tamp/decompressor.h +++ b/tamp/_c_src/tamp/decompressor.h @@ -11,12 +11,11 @@ extern "C" { * Fields are ordered by access frequency for cache efficiency. */ typedef struct { - /* HOT: accessed every iteration of the decompression loop. - * Full-width types avoid bitfield access overhead. */ + /* HOT: accessed every iteration of the decompression loop. */ unsigned char *window; // Pointer to window buffer uint32_t bit_buffer; // Bit buffer for reading compressed data (32 bits) uint16_t window_pos; // Current position in window (15 bits) - uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits) + uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits needed) #if TAMP_V2_DECOMPRESS uint8_t token_state : 2; // 0=none, 1=RLE, 2=ext match, 3=ext match fresh uint16_t pending_window_offset; // Saved window_offset for extended match output-full resume From 70fd7398d90ef73cb5cb9c668e2fc6ffb0ae4286 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 22:11:58 -0500 Subject: [PATCH 016/109] further reduce binary size by 56 bytes via a goto. --- tamp/_c_src/tamp/decompressor.c | 60 +++++++++++++++------------------ 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index af87e2d8..d3565128 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -41,7 +41,7 @@ static const uint8_t HUFFMAN_TABLE[128] = { * * @returns Decoded match_size */ -static int8_t huffman_decode(uint32_t *bit_buffer, uint8_t *bit_buffer_pos) { +static int8_t huffman_decode(uint32_t* bit_buffer, uint8_t* bit_buffer_pos) { uint8_t code; uint8_t bit_len; @@ -71,7 +71,7 @@ static int8_t huffman_decode(uint32_t *bit_buffer, uint8_t *bit_buffer_pos) { * @param result Output: (huffman << trailing_bits) + trailing * @return TAMP_OK on success, TAMP_INPUT_EXHAUSTED if more bits needed */ -static tamp_res decode_huffman_trailing(TampDecompressor *d, uint8_t trailing_bits, uint16_t *result) { +static tamp_res decode_huffman_trailing(TampDecompressor* d, uint8_t trailing_bits, uint16_t* result) { uint32_t bit_buffer = d->bit_buffer; uint8_t bit_buffer_pos = d->bit_buffer_pos; @@ -101,8 +101,8 @@ static tamp_res decode_huffman_trailing(TampDecompressor *d, uint8_t trailing_bi * RLE format: huffman(count_high) + trailing_bits(count_low) * rle_count = (count_high << 4) + count_low + 2 */ -static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, - size_t *output_written_size) { +static tamp_res decode_rle(TampDecompressor* d, unsigned char** output, const unsigned char* output_end, + size_t* output_written_size) { uint16_t rle_count; uint16_t skip = d->skip_bytes; @@ -171,8 +171,8 @@ static tamp_res decode_rle(TampDecompressor *d, unsigned char **output, const un * - TOKEN_EXT_MATCH: have match_size, need window_offset * - Output-full resume (skip > 0): have both match_size and window_offset */ -static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **output, const unsigned char *output_end, - size_t *output_written_size) { +static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** output, const unsigned char* output_end, + size_t* output_written_size) { const uint8_t conf_window = d->conf_window; uint16_t window_offset; uint16_t match_size; @@ -265,7 +265,7 @@ static tamp_res decode_extended_match(TampDecompressor *d, unsigned char **outpu * using a temporary buffer when necessary. Overlap occurs when the * destination would "catch up" to the source during copying. */ -static inline void window_copy(unsigned char *window, uint16_t *window_pos, uint16_t window_offset, uint8_t match_size, +static inline void window_copy(unsigned char* window, uint16_t* window_pos, uint16_t window_offset, uint8_t match_size, uint16_t window_mask) { const uint16_t src_to_dst = (*window_pos - window_offset) & window_mask; const bool overlap = (src_to_dst < match_size) && (src_to_dst > 0); @@ -287,8 +287,8 @@ static inline void window_copy(unsigned char *window, uint16_t *window_pos, uint } } -tamp_res tamp_decompressor_read_header(TampConf *conf, const unsigned char *input, size_t input_size, - size_t *input_consumed_size) { +tamp_res tamp_decompressor_read_header(TampConf* conf, const unsigned char* input, size_t input_size, + size_t* input_consumed_size) { if (input_consumed_size) (*input_consumed_size) = 0; if (input_size == 0) return TAMP_INPUT_EXHAUSTED; if (input[0] & 0x1) return TAMP_INVALID_CONF; // Currently only a single header byte is supported. @@ -307,7 +307,7 @@ tamp_res tamp_decompressor_read_header(TampConf *conf, const unsigned char *inpu * * window * * window_bits_max */ -static tamp_res tamp_decompressor_populate_from_conf(TampDecompressor *decompressor, uint8_t conf_window, +static tamp_res tamp_decompressor_populate_from_conf(TampDecompressor* decompressor, uint8_t conf_window, uint8_t conf_literal, uint8_t conf_use_custom_dictionary, uint8_t conf_v2) { if (conf_window < 8 || conf_window > 15) return TAMP_INVALID_CONF; @@ -328,7 +328,7 @@ static tamp_res tamp_decompressor_populate_from_conf(TampDecompressor *decompres return TAMP_OK; } -tamp_res tamp_decompressor_init(TampDecompressor *decompressor, const TampConf *conf, unsigned char *window, +tamp_res tamp_decompressor_init(TampDecompressor* decompressor, const TampConf* conf, unsigned char* window, uint8_t window_bits) { tamp_res res = TAMP_OK; @@ -336,7 +336,7 @@ tamp_res tamp_decompressor_init(TampDecompressor *decompressor, const TampConf * if (window_bits < 8 || window_bits > 15) return TAMP_INVALID_CONF; for (uint8_t i = 0; i < sizeof(TampDecompressor); i++) // Zero-out the struct - ((unsigned char *)decompressor)[i] = 0; + ((unsigned char*)decompressor)[i] = 0; decompressor->window = window; decompressor->window_bits_max = window_bits; if (conf) { @@ -352,8 +352,8 @@ tamp_res tamp_decompressor_init(TampDecompressor *decompressor, const TampConf * * * Consumes bytes from input until bit_buffer has at least 25 bits or input is exhausted. */ -static inline void refill_bit_buffer(TampDecompressor *d, const unsigned char **input, const unsigned char *input_end, - size_t *input_consumed_size) { +static inline void refill_bit_buffer(TampDecompressor* d, const unsigned char** input, const unsigned char* input_end, + size_t* input_consumed_size) { while (*input != input_end && d->bit_buffer_pos <= 24) { d->bit_buffer_pos += 8; d->bit_buffer |= (uint32_t) * (*input) << (32 - d->bit_buffer_pos); @@ -362,14 +362,14 @@ static inline void refill_bit_buffer(TampDecompressor *d, const unsigned char ** } } -tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigned char *output, size_t output_size, - size_t *output_written_size, const unsigned char *input, size_t input_size, - size_t *input_consumed_size, tamp_callback_t callback, void *user_data) { +tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigned char* output, size_t output_size, + size_t* output_written_size, const unsigned char* input, size_t input_size, + size_t* input_consumed_size, tamp_callback_t callback, void* user_data) { size_t input_consumed_size_proxy; size_t output_written_size_proxy; tamp_res res; - const unsigned char *input_end = input + input_size; - const unsigned char *output_end = output + output_size; + const unsigned char* input_end = input + input_size; + const unsigned char* output_end = output + output_size; if (!output_written_size) output_written_size = &output_written_size_proxy; if (!input_consumed_size) input_consumed_size = &input_consumed_size_proxy; @@ -413,8 +413,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); #if TAMP_V2_DECOMPRESS - /* Resume pending v2 operation. Retry after refill if helper needs more bits. */ + /* Handle v2 tokens - either resuming or fresh from match_size detection below. */ if (TAMP_UNLIKELY(decompressor->token_state)) { + v2_dispatch:; tamp_res v2_res; if (decompressor->token_state == TOKEN_RLE) { v2_res = decode_rle(decompressor, &output, output_end, output_written_size); @@ -424,7 +425,7 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne if (v2_res == TAMP_INPUT_EXHAUSTED) { refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); if (input == input_end) return TAMP_INPUT_EXHAUSTED; - continue; /* Retry with refilled buffer */ + continue; } if (v2_res != TAMP_OK) return v2_res; continue; @@ -479,24 +480,17 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne #if TAMP_V2_DECOMPRESS /* Check for v2 symbols */ if (TAMP_UNLIKELY(v2_enabled && match_size >= TAMP_RLE_SYMBOL)) { - /* Commit bit buffer before calling helper. */ decompressor->bit_buffer = bit_buffer; decompressor->bit_buffer_pos = bit_buffer_pos; - tamp_res v2_res; if (match_size == TAMP_RLE_SYMBOL) { decompressor->token_state = TOKEN_RLE; - v2_res = decode_rle(decompressor, &output, output_end, output_written_size); } else if (match_size == TAMP_EXTENDED_MATCH_SYMBOL) { decompressor->token_state = TOKEN_EXT_MATCH_FRESH; - v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size); } else { - return TAMP_ERROR; /* Invalid v2 symbol */ + return TAMP_ERROR; } - /* On success, helper clears token_state; on error, it stays set for resume. - * TAMP_INPUT_EXHAUSTED is handled by resume path on next iteration. */ - if (v2_res == TAMP_OUTPUT_FULL || v2_res < TAMP_OK) return v2_res; - continue; + goto v2_dispatch; } #endif @@ -555,9 +549,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor *decompressor, unsigne #if TAMP_STREAM -tamp_res tamp_decompress_stream(TampDecompressor *decompressor, tamp_read_t read_cb, void *read_handle, - tamp_write_t write_cb, void *write_handle, size_t *input_consumed_size, - size_t *output_written_size, tamp_callback_t callback, void *user_data) { +tamp_res tamp_decompress_stream(TampDecompressor* decompressor, tamp_read_t read_cb, void* read_handle, + tamp_write_t write_cb, void* write_handle, size_t* input_consumed_size, + size_t* output_written_size, tamp_callback_t callback, void* user_data) { size_t input_consumed_size_proxy, output_written_size_proxy; if (!input_consumed_size) input_consumed_size = &input_consumed_size_proxy; if (!output_written_size) output_written_size = &output_written_size_proxy; From 6b59265ee7edbab297c4663c8c0bc067fbfe06c0 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 22:17:03 -0500 Subject: [PATCH 017/109] use some math instead of if/else --- tamp/_c_src/tamp/decompressor.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index d3565128..c2688b4e 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -8,11 +8,15 @@ #define FLUSH 15 #if TAMP_V2_DECOMPRESS -/* Token state for v2 decode suspend/resume (2 bits). */ +/* Token state for v2 decode suspend/resume (2 bits). + * TOKEN_RLE and TOKEN_EXT_MATCH_FRESH are arranged so that: + * token_state = match_size - (TAMP_RLE_SYMBOL - 1) + * maps TAMP_RLE_SYMBOL (12) -> 1 and TAMP_EXTENDED_MATCH_SYMBOL (13) -> 2. + */ #define TOKEN_NONE 0 #define TOKEN_RLE 1 -#define TOKEN_EXT_MATCH 2 -#define TOKEN_EXT_MATCH_FRESH 3 +#define TOKEN_EXT_MATCH_FRESH 2 +#define TOKEN_EXT_MATCH 3 /* Resume: have match_size, need window_offset */ #endif /** @@ -478,18 +482,12 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne } #if TAMP_V2_DECOMPRESS - /* Check for v2 symbols */ + /* Check for v2 symbols (RLE=12, extended match=13). + * Convert match_size to token_state via subtraction (see TOKEN_* defines). */ if (TAMP_UNLIKELY(v2_enabled && match_size >= TAMP_RLE_SYMBOL)) { decompressor->bit_buffer = bit_buffer; decompressor->bit_buffer_pos = bit_buffer_pos; - - if (match_size == TAMP_RLE_SYMBOL) { - decompressor->token_state = TOKEN_RLE; - } else if (match_size == TAMP_EXTENDED_MATCH_SYMBOL) { - decompressor->token_state = TOKEN_EXT_MATCH_FRESH; - } else { - return TAMP_ERROR; - } + decompressor->token_state = match_size - (TAMP_RLE_SYMBOL - 1); goto v2_dispatch; } #endif From 59e40909ab136e958a5068c6ae1badb12b1e0935 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 22:18:00 -0500 Subject: [PATCH 018/109] no need for v2_res --- tamp/_c_src/tamp/decompressor.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index c2688b4e..0772943a 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -419,19 +419,18 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne #if TAMP_V2_DECOMPRESS /* Handle v2 tokens - either resuming or fresh from match_size detection below. */ if (TAMP_UNLIKELY(decompressor->token_state)) { - v2_dispatch:; - tamp_res v2_res; + v2_dispatch: if (decompressor->token_state == TOKEN_RLE) { - v2_res = decode_rle(decompressor, &output, output_end, output_written_size); + res = decode_rle(decompressor, &output, output_end, output_written_size); } else { - v2_res = decode_extended_match(decompressor, &output, output_end, output_written_size); + res = decode_extended_match(decompressor, &output, output_end, output_written_size); } - if (v2_res == TAMP_INPUT_EXHAUSTED) { + if (res == TAMP_INPUT_EXHAUSTED) { refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); if (input == input_end) return TAMP_INPUT_EXHAUSTED; continue; } - if (v2_res != TAMP_OK) return v2_res; + if (res != TAMP_OK) return res; continue; } #endif From f59fd08c1110d4c10becfec1d312dd95594aa43b Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 22:25:11 -0500 Subject: [PATCH 019/109] simplify while-loop check with a union. reduces binary by 56 bytes --- tamp/_c_src/tamp/decompressor.c | 6 +----- tamp/_c_src/tamp/decompressor.h | 24 +++++++++++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 0772943a..d156739e 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -406,11 +406,7 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne const bool v2_enabled = decompressor->conf_v2; #endif - while (input != input_end || decompressor->bit_buffer_pos -#if TAMP_V2_DECOMPRESS - || decompressor->token_state -#endif - ) { + while (input != input_end || decompressor->pos_and_state) { if (TAMP_UNLIKELY(output == output_end)) return TAMP_OUTPUT_FULL; // Populate the bit buffer diff --git a/tamp/_c_src/tamp/decompressor.h b/tamp/_c_src/tamp/decompressor.h index d12c98a9..8e333ba8 100644 --- a/tamp/_c_src/tamp/decompressor.h +++ b/tamp/_c_src/tamp/decompressor.h @@ -12,12 +12,26 @@ extern "C" { */ typedef struct { /* HOT: accessed every iteration of the decompression loop. */ - unsigned char *window; // Pointer to window buffer - uint32_t bit_buffer; // Bit buffer for reading compressed data (32 bits) - uint16_t window_pos; // Current position in window (15 bits) - uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits needed) + unsigned char *window; // Pointer to window buffer + uint32_t bit_buffer; // Bit buffer for reading compressed data (32 bits) + uint16_t window_pos; // Current position in window (15 bits) + + /* Union allows single zero-check in main loop instead of two separate checks. */ +#if TAMP_V2_DECOMPRESS + union { + struct { + uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits needed) + uint8_t token_state; // 0=none, 1=RLE, 2=ext match, 3=ext match fresh (2 bits used) + }; + uint16_t pos_and_state; // Combined for fast 16-bit zero-check + }; +#else + union { + uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits needed) + uint8_t pos_and_state; // Alias for consistent access in main loop + }; +#endif #if TAMP_V2_DECOMPRESS - uint8_t token_state : 2; // 0=none, 1=RLE, 2=ext match, 3=ext match fresh uint16_t pending_window_offset; // Saved window_offset for extended match output-full resume uint16_t pending_match_size; // Saved match_size for extended match resume #endif From a925f62b82b10df977cf9575d8a1538391f218c4 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 22:42:37 -0500 Subject: [PATCH 020/109] unified huffman decode. --- tamp/_c_src/tamp/decompressor.c | 113 +++++++++++++++----------------- 1 file changed, 54 insertions(+), 59 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index d156739e..f50c0c33 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -37,68 +37,52 @@ static const uint8_t HUFFMAN_TABLE[128] = { 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; /** - * @brief Decode a huffman match-size symbol from the decompressor's bit_buffer. + * @brief Decode huffman symbol + optional trailing bits from bit buffer. * - * Internally updates bit_buffer and bit_buffer_pos. + * Modifies bit_buffer and bit_buffer_pos in place. Caller is responsible + * for committing to decompressor state if needed. * - * bit_buffer MUST have at least 8 bits prior to calling. - * - * @returns Decoded match_size - */ -static int8_t huffman_decode(uint32_t* bit_buffer, uint8_t* bit_buffer_pos) { - uint8_t code; - uint8_t bit_len; - - (*bit_buffer_pos)--; - code = *bit_buffer >> 31; - *bit_buffer <<= 1; - if (TAMP_LIKELY(code == 0)) return 0; - - code = *bit_buffer >> (32 - 7); - code = HUFFMAN_TABLE[code]; - bit_len = code >> 4; - *bit_buffer <<= bit_len; - (*bit_buffer_pos) -= bit_len; - - return code & 0xF; -} - -#if TAMP_V2_DECOMPRESS -/** - * @brief Decode huffman symbol + trailing bits from bit buffer. - * - * Simple helper that decodes from local copies. On failure, decompressor - * state is not modified. Caller is responsible for state management. - * - * @param d Decompressor state - * @param trailing_bits Number of trailing bits to read (3 or 4) + * @param bit_buffer Pointer to bit buffer (modified in place) + * @param bit_buffer_pos Pointer to bit position (modified in place) + * @param trailing_bits Number of trailing bits to read (0, 3, or 4) * @param result Output: (huffman << trailing_bits) + trailing * @return TAMP_OK on success, TAMP_INPUT_EXHAUSTED if more bits needed */ -static tamp_res decode_huffman_trailing(TampDecompressor* d, uint8_t trailing_bits, uint16_t* result) { - uint32_t bit_buffer = d->bit_buffer; - uint8_t bit_buffer_pos = d->bit_buffer_pos; - +static tamp_res decode_huffman(uint32_t* bit_buffer, uint8_t* bit_buffer_pos, uint8_t trailing_bits, uint16_t* result) { /* Need at least 1 bit for huffman, plus trailing bits */ - if (TAMP_UNLIKELY(bit_buffer_pos < 1 + trailing_bits)) return TAMP_INPUT_EXHAUSTED; - - int8_t huffman_value = huffman_decode(&bit_buffer, &bit_buffer_pos); + if (TAMP_UNLIKELY(*bit_buffer_pos < 1 + trailing_bits)) return TAMP_INPUT_EXHAUSTED; - if (TAMP_UNLIKELY(bit_buffer_pos < trailing_bits)) return TAMP_INPUT_EXHAUSTED; - - uint8_t trailing = bit_buffer >> (32 - trailing_bits); - bit_buffer <<= trailing_bits; - bit_buffer_pos -= trailing_bits; - - *result = (huffman_value << trailing_bits) + trailing; + /* Decode huffman symbol */ + int8_t huffman_value; + (*bit_buffer_pos)--; + if (TAMP_LIKELY((*bit_buffer >> 31) == 0)) { + *bit_buffer <<= 1; + huffman_value = 0; + } else { + *bit_buffer <<= 1; + uint8_t code = HUFFMAN_TABLE[*bit_buffer >> (32 - 7)]; + uint8_t bit_len = code >> 4; + if (TAMP_UNLIKELY(*bit_buffer_pos < bit_len + trailing_bits)) return TAMP_INPUT_EXHAUSTED; + *bit_buffer <<= bit_len; + *bit_buffer_pos -= bit_len; + huffman_value = code & 0xF; + } - /* Commit only on success */ - d->bit_buffer = bit_buffer; - d->bit_buffer_pos = bit_buffer_pos; + /* Read trailing bits (skip if trailing_bits==0 to avoid undefined shift) */ + if (trailing_bits) { + uint8_t trailing = *bit_buffer >> (32 - trailing_bits); + *bit_buffer <<= trailing_bits; + *bit_buffer_pos -= trailing_bits; + *result = (huffman_value << trailing_bits) + trailing; + } else { + *result = huffman_value; + } return TAMP_OK; } +#if TAMP_V2_DECOMPRESS + /** * @brief Decode RLE token and write repeated bytes to output. * @@ -115,9 +99,13 @@ static tamp_res decode_rle(TampDecompressor* d, unsigned char** output, const un rle_count = d->pending_window_offset; } else { /* Fresh decode */ + uint32_t bit_buffer = d->bit_buffer; + uint8_t bit_buffer_pos = d->bit_buffer_pos; uint16_t raw; - tamp_res res = decode_huffman_trailing(d, TAMP_LEADING_RLE_BITS, &raw); + tamp_res res = decode_huffman(&bit_buffer, &bit_buffer_pos, TAMP_LEADING_RLE_BITS, &raw); if (res != TAMP_OK) return res; + d->bit_buffer = bit_buffer; + d->bit_buffer_pos = bit_buffer_pos; rle_count = raw + 2; } @@ -196,21 +184,27 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu d->bit_buffer_pos -= conf_window; } else { /* Fresh decode: huffman+trailing first, then window_offset */ + uint32_t bit_buffer = d->bit_buffer; + uint8_t bit_buffer_pos = d->bit_buffer_pos; uint16_t raw; - tamp_res res = decode_huffman_trailing(d, TAMP_LEADING_EXTENDED_MATCH_BITS, &raw); + tamp_res res = decode_huffman(&bit_buffer, &bit_buffer_pos, TAMP_LEADING_EXTENDED_MATCH_BITS, &raw); if (res != TAMP_OK) return res; match_size = raw + d->min_pattern_size + 12; /* Now decode window_offset */ - if (TAMP_UNLIKELY(d->bit_buffer_pos < conf_window)) { + if (TAMP_UNLIKELY(bit_buffer_pos < conf_window)) { /* Save match_size and return */ + d->bit_buffer = bit_buffer; + d->bit_buffer_pos = bit_buffer_pos; d->token_state = TOKEN_EXT_MATCH; d->pending_match_size = match_size; return TAMP_INPUT_EXHAUSTED; } - window_offset = d->bit_buffer >> (32 - conf_window); - d->bit_buffer <<= conf_window; - d->bit_buffer_pos -= conf_window; + window_offset = bit_buffer >> (32 - conf_window); + bit_buffer <<= conf_window; + bit_buffer_pos -= conf_window; + d->bit_buffer = bit_buffer; + d->bit_buffer_pos = bit_buffer_pos; } /* Security check: validate window bounds */ @@ -464,10 +458,11 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne bit_buffer <<= 1; bit_buffer_pos--; - // There must be at least 8 bits, otherwise no possible decoding. - if (TAMP_UNLIKELY(bit_buffer_pos < 8)) return TAMP_INPUT_EXHAUSTED; + uint16_t match_size_u16; + if (decode_huffman(&bit_buffer, &bit_buffer_pos, 0, &match_size_u16) != TAMP_OK) + return TAMP_INPUT_EXHAUSTED; + match_size = match_size_u16; - match_size = huffman_decode(&bit_buffer, &bit_buffer_pos); if (TAMP_UNLIKELY(match_size == FLUSH)) { // flush bit_buffer to the nearest byte and skip the remainder of decoding decompressor->bit_buffer = bit_buffer << (bit_buffer_pos & 7); From e69b826db026d667668e29944259cfb5a878b597 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 22:51:16 -0500 Subject: [PATCH 021/109] Add comment about HUFFMAN_TABLE being pretty optimized. --- tamp/_c_src/tamp/decompressor.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index f50c0c33..6defe074 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -20,14 +20,11 @@ #endif /** - * This array was generated with tools/huffman_jump_table.py + * Huffman lookup table indexed by 7 bits (after first "1" bit consumed). + * Upper 4 bits = additional bits to consume, lower 4 bits = symbol (15 = FLUSH). * - * The idea is that the resulting code is smaller/faster as a lookup table than a bunch of if/else - * statements. - * - * Of each element: - * * The upper 4 bits express the number of bits to decode. - * * The lower 4 bits express the decoded value, with FLUSH being represented as 0b1111 + * Note: A 64-byte table with special-cased symbol 1 was tried but was ~10% slower + * and only saved 8 bytes in final firmware due to added branch logic. */ static const uint8_t HUFFMAN_TABLE[128] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 85, 85, 85, 85, 122, 123, 104, 104, 86, 86, @@ -56,9 +53,11 @@ static tamp_res decode_huffman(uint32_t* bit_buffer, uint8_t* bit_buffer_pos, ui int8_t huffman_value; (*bit_buffer_pos)--; if (TAMP_LIKELY((*bit_buffer >> 31) == 0)) { + /* Symbol 0: code "0" */ *bit_buffer <<= 1; huffman_value = 0; } else { + /* All other symbols: use 128-entry table indexed by next 7 bits */ *bit_buffer <<= 1; uint8_t code = HUFFMAN_TABLE[*bit_buffer >> (32 - 7)]; uint8_t bit_len = code >> 4; From 589a9a2cb854933a80f66879043be9819576202a Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 23:01:51 -0500 Subject: [PATCH 022/109] Make some datatypes smaller; reduces binary by 36 bytes. --- tamp/_c_src/tamp/decompressor.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 6defe074..2c43c08e 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -42,10 +42,10 @@ static const uint8_t HUFFMAN_TABLE[128] = { * @param bit_buffer Pointer to bit buffer (modified in place) * @param bit_buffer_pos Pointer to bit position (modified in place) * @param trailing_bits Number of trailing bits to read (0, 3, or 4) - * @param result Output: (huffman << trailing_bits) + trailing + * @param result Output: (huffman << trailing_bits) + trailing (max 223 for trailing_bits=4) * @return TAMP_OK on success, TAMP_INPUT_EXHAUSTED if more bits needed */ -static tamp_res decode_huffman(uint32_t* bit_buffer, uint8_t* bit_buffer_pos, uint8_t trailing_bits, uint16_t* result) { +static tamp_res decode_huffman(uint32_t* bit_buffer, uint8_t* bit_buffer_pos, uint8_t trailing_bits, uint8_t* result) { /* Need at least 1 bit for huffman, plus trailing bits */ if (TAMP_UNLIKELY(*bit_buffer_pos < 1 + trailing_bits)) return TAMP_INPUT_EXHAUSTED; @@ -90,8 +90,8 @@ static tamp_res decode_huffman(uint32_t* bit_buffer, uint8_t* bit_buffer_pos, ui */ static tamp_res decode_rle(TampDecompressor* d, unsigned char** output, const unsigned char* output_end, size_t* output_written_size) { - uint16_t rle_count; - uint16_t skip = d->skip_bytes; + uint8_t rle_count; /* max 225: (13 << 4) + 15 + 2 */ + uint8_t skip = d->skip_bytes; if (skip > 0) { /* Resume from output-full: rle_count saved in pending_window_offset */ @@ -100,7 +100,7 @@ static tamp_res decode_rle(TampDecompressor* d, unsigned char** output, const un /* Fresh decode */ uint32_t bit_buffer = d->bit_buffer; uint8_t bit_buffer_pos = d->bit_buffer_pos; - uint16_t raw; + uint8_t raw; tamp_res res = decode_huffman(&bit_buffer, &bit_buffer_pos, TAMP_LEADING_RLE_BITS, &raw); if (res != TAMP_OK) return res; d->bit_buffer = bit_buffer; @@ -113,14 +113,14 @@ static tamp_res decode_rle(TampDecompressor* d, unsigned char** output, const un uint8_t symbol = d->window[prev_pos]; /* Calculate how many to write this call */ - uint16_t remaining_count = rle_count - skip; + uint8_t remaining_count = rle_count - skip; size_t output_space = output_end - *output; - uint16_t to_write; + uint8_t to_write; if (TAMP_UNLIKELY(remaining_count > output_space)) { /* Partial write - save state for resume */ to_write = output_space; - d->skip_bytes = skip + output_space; + d->skip_bytes = skip + to_write; d->token_state = TOKEN_RLE; d->pending_window_offset = rle_count; } else { @@ -185,7 +185,7 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu /* Fresh decode: huffman+trailing first, then window_offset */ uint32_t bit_buffer = d->bit_buffer; uint8_t bit_buffer_pos = d->bit_buffer_pos; - uint16_t raw; + uint8_t raw; tamp_res res = decode_huffman(&bit_buffer, &bit_buffer_pos, TAMP_LEADING_EXTENDED_MATCH_BITS, &raw); if (res != TAMP_OK) return res; match_size = raw + d->min_pattern_size + 12; @@ -457,10 +457,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne bit_buffer <<= 1; bit_buffer_pos--; - uint16_t match_size_u16; - if (decode_huffman(&bit_buffer, &bit_buffer_pos, 0, &match_size_u16) != TAMP_OK) - return TAMP_INPUT_EXHAUSTED; - match_size = match_size_u16; + uint8_t match_size_u8; + if (decode_huffman(&bit_buffer, &bit_buffer_pos, 0, &match_size_u8) != TAMP_OK) return TAMP_INPUT_EXHAUSTED; + match_size = match_size_u8; if (TAMP_UNLIKELY(match_size == FLUSH)) { // flush bit_buffer to the nearest byte and skip the remainder of decoding From e081f53506856c15d24b567682f1fc1c72d9aacb Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 24 Jan 2026 23:15:34 -0500 Subject: [PATCH 023/109] reduce some dtypes to uint8 --- tamp/_c_src/tamp/decompressor.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 2c43c08e..b245895e 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -131,7 +131,7 @@ static tamp_res decode_rle(TampDecompressor* d, unsigned char** output, const un } /* Write repeated bytes to output */ - for (uint16_t i = 0; i < to_write; i++) { + for (uint8_t i = 0; i < to_write; i++) { *(*output)++ = symbol; } *output_written_size += to_write; @@ -141,8 +141,8 @@ static tamp_res decode_rle(TampDecompressor* d, unsigned char** output, const un if (skip == 0) { const uint16_t window_size = 1u << d->conf_window; uint16_t remaining = window_size - d->window_pos; - uint16_t window_write = MIN(MIN(rle_count, TAMP_RLE_MAX_WINDOW), remaining); - for (uint16_t i = 0; i < window_write; i++) { + uint8_t window_write = MIN(MIN(rle_count, TAMP_RLE_MAX_WINDOW), remaining); /* max 8 */ + for (uint8_t i = 0; i < window_write; i++) { d->window[d->window_pos++] = symbol; } d->window_pos &= (window_size - 1); @@ -166,8 +166,8 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu size_t* output_written_size) { const uint8_t conf_window = d->conf_window; uint16_t window_offset; - uint16_t match_size; - uint16_t skip = d->skip_bytes; + uint8_t match_size; /* max 126: (13<<3)+7 + 3 + 12 */ + uint8_t skip = d->skip_bytes; if (skip > 0) { /* Resume from output-full: both values saved */ @@ -214,9 +214,9 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu } /* Calculate how many to write this call */ - uint16_t remaining_count = match_size - skip; + uint8_t remaining_count = match_size - skip; size_t output_space = output_end - *output; - uint16_t to_write; + uint8_t to_write; if (TAMP_UNLIKELY(remaining_count > output_space)) { /* Partial write - save state for resume */ @@ -234,7 +234,7 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu /* Copy from window to output */ uint16_t src_offset = window_offset + skip; - for (uint16_t i = 0; i < to_write; i++) { + for (uint8_t i = 0; i < to_write; i++) { *(*output)++ = d->window[src_offset + i]; } *output_written_size += to_write; @@ -243,9 +243,9 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu * Write up to end of buffer (no wrap), mask wp only at the end. */ if (d->token_state == TOKEN_NONE) { uint16_t remaining = window_size - d->window_pos; - uint16_t window_write = (match_size < remaining) ? match_size : remaining; + uint8_t window_write = (match_size < remaining) ? match_size : remaining; /* max 126 */ uint16_t wp = d->window_pos; - for (uint16_t i = 0; i < window_write; i++) { + for (uint8_t i = 0; i < window_write; i++) { d->window[wp++] = d->window[window_offset + i]; } d->window_pos = wp & (window_size - 1); From 66bdc09c65ad6983dc381c8ffc1ce76b7ce86f0a Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 25 Jan 2026 11:52:26 -0500 Subject: [PATCH 024/109] prep cython bindings for c-compressor-v2 --- tamp/_c_compressor.pyx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tamp/_c_compressor.pyx b/tamp/_c_compressor.pyx index ae690257..8655d085 100644 --- a/tamp/_c_compressor.pyx +++ b/tamp/_c_compressor.pyx @@ -35,13 +35,10 @@ cdef class Compressor: int literal=8, dictionary=None, bool lazy_matching=False, - bool v2=False, + bool v2=True, ): cdef ctamp.TampConf conf - if v2: - raise NotImplementedError("v2 compression not yet supported in C compressor. Use --v1 flag or --implementation python.") - if dictionary and bit_size(len(dictionary) - 1) != window: raise ValueError("Dictionary-window size mismatch.") @@ -59,6 +56,7 @@ cdef class Compressor: # Set lazy_matching - this field is conditionally compiled based on TAMP_LAZY_MATCHING # The build system defines this macro, so the field should be available conf.lazy_matching = lazy_matching + conf.v2 = v2 self._window_buffer = dictionary if dictionary else bytearray(1 << window) self._window_buffer_ptr = self._window_buffer From bfe963162f881cc2b1754d3b3837ea90e70b6376 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 25 Jan 2026 21:41:10 -0500 Subject: [PATCH 025/109] don't wrap extended match --- tamp/compressor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tamp/compressor.py b/tamp/compressor.py index 5194515d..c3e880f8 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -128,8 +128,10 @@ def write_from_self(self, position, size): # Write up to end of buffer (no wrap) remaining = self.size - self.pos window_write = min(size, remaining) - for i in range(window_write): - self.buffer[self.pos] = self.buffer[position + i] + # Read source data first to avoid overlap when source and destination ranges overlap + data = self.get(position, window_write) + for byte in data: + self.buffer[self.pos] = byte self.pos += 1 self.pos %= self.size From 37c360813b3a20f6a97353c2eb00f54a32d5a9b7 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 26 Jan 2026 08:53:59 -0500 Subject: [PATCH 026/109] more robust window_copy --- tamp/_c_src/tamp/decompressor.c | 61 +++++++++++++++------------------ 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index b245895e..35d2c8a3 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -7,6 +7,30 @@ #define FLUSH 15 +/** + * @brief Copy pattern from window to window, updating window_pos. + * + * Handles potential overlap between source and destination regions by + * copying backwards when the destination would "catch up" to the source. + */ +TAMP_NOINLINE static void window_copy(unsigned char* window, uint16_t* window_pos, uint16_t window_offset, + uint8_t match_size, uint16_t window_mask) { + const uint16_t src_to_dst = (*window_pos - window_offset) & window_mask; + + if (TAMP_UNLIKELY(src_to_dst < match_size && src_to_dst > 0)) { + /* Overlap with dst > src: copy backwards to avoid corruption. */ + for (uint8_t i = match_size; i-- > 0;) { + window[(*window_pos + i) & window_mask] = window[window_offset + i]; + } + *window_pos = (*window_pos + match_size) & window_mask; + } else { + for (uint8_t i = 0; i < match_size; i++) { + window[*window_pos] = window[window_offset + i]; + *window_pos = (*window_pos + 1) & window_mask; + } + } +} + #if TAMP_V2_DECOMPRESS /* Token state for v2 decode suspend/resume (2 bits). * TOKEN_RLE and TOKEN_EXT_MATCH_FRESH are arranged so that: @@ -240,50 +264,19 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu *output_written_size += to_write; /* Update window only on complete decode. - * Write up to end of buffer (no wrap), mask wp only at the end. */ + * Write up to end of buffer (no wrap). */ if (d->token_state == TOKEN_NONE) { uint16_t remaining = window_size - d->window_pos; uint8_t window_write = (match_size < remaining) ? match_size : remaining; /* max 126 */ uint16_t wp = d->window_pos; - for (uint8_t i = 0; i < window_write; i++) { - d->window[wp++] = d->window[window_offset + i]; - } - d->window_pos = wp & (window_size - 1); + window_copy(d->window, &wp, window_offset, window_write, window_size - 1); + d->window_pos = wp; } return (d->token_state == TOKEN_NONE) ? TAMP_OK : TAMP_OUTPUT_FULL; } #endif /* TAMP_V2_DECOMPRESS */ -/** - * @brief Copy pattern from window to window, updating window_pos. - * - * Handles potential overlap between source and destination regions by - * using a temporary buffer when necessary. Overlap occurs when the - * destination would "catch up" to the source during copying. - */ -static inline void window_copy(unsigned char* window, uint16_t* window_pos, uint16_t window_offset, uint8_t match_size, - uint16_t window_mask) { - const uint16_t src_to_dst = (*window_pos - window_offset) & window_mask; - const bool overlap = (src_to_dst < match_size) && (src_to_dst > 0); - - if (TAMP_UNLIKELY(overlap)) { - uint8_t tmp_buf[16]; - for (uint8_t i = 0; i < match_size; i++) { - tmp_buf[i] = window[window_offset + i]; - } - for (uint8_t i = 0; i < match_size; i++) { - window[*window_pos] = tmp_buf[i]; - *window_pos = (*window_pos + 1) & window_mask; - } - } else { - for (uint8_t i = 0; i < match_size; i++) { - window[*window_pos] = window[window_offset + i]; - *window_pos = (*window_pos + 1) & window_mask; - } - } -} - tamp_res tamp_decompressor_read_header(TampConf* conf, const unsigned char* input, size_t input_size, size_t* input_consumed_size) { if (input_consumed_size) (*input_consumed_size) = 0; From 19baefb30e021f751f09f48186a2f13c81badba5 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sat, 31 Jan 2026 22:10:22 -0500 Subject: [PATCH 027/109] simplify rle criteria --- tamp/compressor.py | 148 +++++++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 66 deletions(-) diff --git a/tamp/compressor.py b/tamp/compressor.py index c3e880f8..b5e7c392 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -33,25 +33,6 @@ _LEADING_RLE_HUFFMAN_BITS = 4 -def _determine_rle_breakeven_point(min_pattern_size, window_bits): - # Determines if a pattern-match would be shorter than a RLE match. - # See how many bits this encoding would be with RLE - rle_length_bits = {} - for i in range(min_pattern_size, min_pattern_size + 11 + 1): - rle_length_bits[i] = 8 + _LEADING_RLE_HUFFMAN_BITS + _huffman_bits[(i - 1) >> _LEADING_RLE_HUFFMAN_BITS] - - pattern_length_bits = {} - for i in range(min_pattern_size, min_pattern_size + 11 + 1): - pattern_length_bits[i] = _huffman_bits[i - min_pattern_size] + window_bits - - breakeven_point = 0 - for pattern_size in sorted(pattern_length_bits): - if pattern_length_bits[pattern_size] < rle_length_bits[pattern_size]: - breakeven_point = pattern_size - - return breakeven_point - - class _BitWriter: """Writes bits to a stream.""" @@ -133,7 +114,8 @@ def write_from_self(self, position, size): for byte in data: self.buffer[self.pos] = byte self.pos += 1 - self.pos %= self.size + if self.pos == self.size: + self.pos = 0 def get(self, index, size): out = bytearray(size) @@ -198,7 +180,6 @@ def __init__( # "+1" Because a RLE of 1 is not valid. self._rle_max_size = (13 << _LEADING_RLE_HUFFMAN_BITS) + (1 << _LEADING_RLE_HUFFMAN_BITS) + 1 - self._rle_breakeven = _determine_rle_breakeven_point(self.min_pattern_size, self.window_bits) self._extended_match_count = 0 self._extended_match_position = 0 @@ -290,57 +271,92 @@ def _compress_input_buffer_single(self) -> int: # We ran out of input_buffer, return so caller can re-populate the input_buffer return bytes_written - target = bytes(self._input_buffer) - search_i = 0 - match_size = 1 + # RLE handling with persistent state (v2 only) + # Accumulate RLE count across compression cycles for better compression of long runs + have_match_from_rle = False # Track if we already did pattern matching in RLE section if self.v2: - # RLE same-character-counting logic - while ( - target and target[0] == self._window_buffer.last_written_byte and self._rle_count < self._rle_max_size - ): - self._rle_count += 1 - self._input_buffer.popleft() - target = bytes(self._input_buffer) - if not target and self._rle_count != self._rle_max_size: - # Need more input to see if the RLE continues + last_byte = self._window_buffer.last_written_byte + + # Count additional matching bytes in current buffer + new_rle_bytes = 0 + for byte in self._input_buffer: + if byte == last_byte and self._rle_count + new_rle_bytes < self._rle_max_size: + new_rle_bytes += 1 + else: + break + + # If we consumed whole buffer and haven't hit max, keep accumulating + if new_rle_bytes == len(self._input_buffer) and self._rle_count + new_rle_bytes < self._rle_max_size: + # Consume these bytes and wait for more data + for _ in range(new_rle_bytes): + self._input_buffer.popleft() + self._rle_count += new_rle_bytes return bytes_written - if self._rle_count == 1: - # This is not RLE; attempt to pattern-match or just write literals. - self._input_buffer.appendleft(self._window_buffer.last_written_byte) - target = bytes(self._input_buffer) - self._rle_count = 0 - elif self._rle_count: - if self._rle_count > self._rle_breakeven: - # It's certainly better to do a RLE write than searching for a pattern. - bytes_written += self._write_rle() + + # RLE run has ended or hit max - decide what to encode + total_rle_count = self._rle_count + new_rle_bytes + + if total_rle_count >= 2: + # Build search target: accumulated RLE + new RLE + rest of buffer + # This allows pattern matching to find longer sequences + target = bytes([last_byte]) * total_rle_count + bytes(list(self._input_buffer)[new_rle_bytes:]) + + # Do pattern search + if self.lazy_matching and self._cached_match_index >= 0: + search_i = self._cached_match_index + match_size = self._cached_match_size + match = self._window_buffer.get(search_i, match_size) + self._cached_match_index = -1 + else: + search_i, match = self._search(target, start=0) + match_size = len(match) + + have_match_from_rle = True + + # Simple decision: if pattern match is longer, use it; otherwise use RLE + if match_size >= self.min_pattern_size and match_size > total_rle_count: + # Pattern match wins + # Consume bytes from input_buffer (only the bytes actually in the buffer) + # Note: first _rle_count bytes were already consumed in previous calls + bytes_to_consume = match_size - self._rle_count + for _ in range(bytes_to_consume): + self._input_buffer.popleft() + self._rle_count = 0 + + # Write the pattern match immediately and return + # (Don't continue to normal flow which would try to consume bytes again) + if self.v2 and match_size > (self.min_pattern_size + 11): + self._extended_match_position = search_i + self._extended_match_count = match_size + bytes_written += self._write_extended_match() + else: + bytes_written += self._write_match(search_i, match) return bytes_written else: - # We'll see if pattern-matching offers a better encoding. - target = bytes([self._window_buffer.last_written_byte]) * self._rle_count - - # Check if we have a cached match from lazy matching - if self.lazy_matching and self._cached_match_index >= 0: - search_i = self._cached_match_index - match_size = self._cached_match_size - match = self._window_buffer.get(search_i, match_size) - self._cached_match_index = -1 # Clear cache after using - else: - # Perform normal pattern-matching - search_i, match = self._search(target, start=0) - match_size = len(match) - - if self._rle_count: - # Check to see if the found pattern-match is more efficient than the RLE encoding. - assert self._rle_count >= 2 # noqa: S101 - if match_size >= self._rle_count: - # Pattern is better than RLE - bytes_written += self._write_match(search_i, match) + # RLE wins - commit RLE + for _ in range(new_rle_bytes): + self._input_buffer.popleft() + self._rle_count = total_rle_count + bytes_written += self._write_rle() + return bytes_written + elif self._rle_count == 1: + # Single byte isn't worth RLE encoding self._rle_count = 0 - return bytes_written + # Fall through to normal pattern matching + + # Normal pattern matching (when no RLE or RLE was abandoned for pattern) + if not have_match_from_rle: + target = bytes(self._input_buffer) + + if self.lazy_matching and self._cached_match_index >= 0: + search_i = self._cached_match_index + match_size = self._cached_match_size + match = self._window_buffer.get(search_i, match_size) + self._cached_match_index = -1 else: - # RLE is better than pattern - return self._write_rle() + search_i, match = self._search(target, start=0) + match_size = len(match) # Lazy matching logic if ( @@ -476,7 +492,7 @@ def _write_rle(self) -> int: bytes_written += self._bit_writer.write_huffman_and_literal_flag(_RLE_SYMBOL) bytes_written += self._write_extended_huffman(self._rle_count - 2, _LEADING_RLE_HUFFMAN_BITS) - # Write up to 8 bytes (or until end of buffer) to the window. + # Write up to 8 bytes to the window (up to end of buffer, no wrap). remaining = self._window_buffer.size - self._window_buffer.pos window_write = min(self._rle_count, _RLE_MAX_WINDOW, remaining) self._window_buffer.write_bytes(bytes([last_written_byte]) * window_write) From 5ae02824b7c631ff8d3e8c4541f23d918c543693 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 1 Feb 2026 10:39:26 -0500 Subject: [PATCH 028/109] simplify window_copy call --- tamp/_c_src/tamp/decompressor.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 35d2c8a3..d4b61f7a 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -263,13 +263,10 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu } *output_written_size += to_write; - /* Update window only on complete decode. - * Write up to end of buffer (no wrap). */ + /* Update window only on complete decode. */ if (d->token_state == TOKEN_NONE) { - uint16_t remaining = window_size - d->window_pos; - uint8_t window_write = (match_size < remaining) ? match_size : remaining; /* max 126 */ uint16_t wp = d->window_pos; - window_copy(d->window, &wp, window_offset, window_write, window_size - 1); + window_copy(d->window, &wp, window_offset, match_size, window_size - 1); d->window_pos = wp; } From 983532c35c59a2f6d7c5ec9d9ef3406d153a12b7 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 1 Feb 2026 11:10:01 -0500 Subject: [PATCH 029/109] move window_copy to a better location. --- tamp/_c_src/tamp/decompressor.c | 48 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index d4b61f7a..d419df11 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -7,30 +7,6 @@ #define FLUSH 15 -/** - * @brief Copy pattern from window to window, updating window_pos. - * - * Handles potential overlap between source and destination regions by - * copying backwards when the destination would "catch up" to the source. - */ -TAMP_NOINLINE static void window_copy(unsigned char* window, uint16_t* window_pos, uint16_t window_offset, - uint8_t match_size, uint16_t window_mask) { - const uint16_t src_to_dst = (*window_pos - window_offset) & window_mask; - - if (TAMP_UNLIKELY(src_to_dst < match_size && src_to_dst > 0)) { - /* Overlap with dst > src: copy backwards to avoid corruption. */ - for (uint8_t i = match_size; i-- > 0;) { - window[(*window_pos + i) & window_mask] = window[window_offset + i]; - } - *window_pos = (*window_pos + match_size) & window_mask; - } else { - for (uint8_t i = 0; i < match_size; i++) { - window[*window_pos] = window[window_offset + i]; - *window_pos = (*window_pos + 1) & window_mask; - } - } -} - #if TAMP_V2_DECOMPRESS /* Token state for v2 decode suspend/resume (2 bits). * TOKEN_RLE and TOKEN_EXT_MATCH_FRESH are arranged so that: @@ -57,6 +33,30 @@ static const uint8_t HUFFMAN_TABLE[128] = { 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; +/** + * @brief Copy pattern from window to window, updating window_pos. + * + * Handles potential overlap between source and destination regions by + * copying backwards when the destination would "catch up" to the source. + */ +TAMP_NOINLINE static void window_copy(unsigned char* window, uint16_t* window_pos, uint16_t window_offset, + uint8_t match_size, uint16_t window_mask) { + const uint16_t src_to_dst = (*window_pos - window_offset) & window_mask; + + if (TAMP_UNLIKELY(src_to_dst < match_size && src_to_dst > 0)) { + /* Overlap with dst > src: copy backwards to avoid corruption. */ + for (uint8_t i = match_size; i-- > 0;) { + window[(*window_pos + i) & window_mask] = window[window_offset + i]; + } + *window_pos = (*window_pos + match_size) & window_mask; + } else { + for (uint8_t i = 0; i < match_size; i++) { + window[*window_pos] = window[window_offset + i]; + *window_pos = (*window_pos + 1) & window_mask; + } + } +} + /** * @brief Decode huffman symbol + optional trailing bits from bit buffer. * From 131c25a8552329892b9382d5910d77634c627db7 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 1 Feb 2026 12:29:02 -0500 Subject: [PATCH 030/109] more comments --- tamp/_c_src/tamp/decompressor.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index d419df11..5a289232 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -38,13 +38,36 @@ static const uint8_t HUFFMAN_TABLE[128] = { * * Handles potential overlap between source and destination regions by * copying backwards when the destination would "catch up" to the source. + * + * IMPORTANT: Caller must validate that (window_offset + match_size) does not + * exceed window bounds before calling this function. This function assumes + * window_offset and match_size are pre-validated and does not perform + * bounds checking on source reads. + * + * @param window Circular buffer (size must be power of 2) + * @param window_pos Current write position (updated by this function) + * @param window_offset Source position to copy from + * @param match_size Number of bytes to copy + * @param window_mask Bitmask for wrapping (window_size - 1) */ TAMP_NOINLINE static void window_copy(unsigned char* window, uint16_t* window_pos, uint16_t window_offset, uint8_t match_size, uint16_t window_mask) { + /* Calculate distance from source to destination in circular buffer. + * src_to_dst = (dst - src) & mask gives the forward distance. */ const uint16_t src_to_dst = (*window_pos - window_offset) & window_mask; + /* Critical overlap case: destination is AHEAD of source and they overlap. + * When dst > src by less than match_size, a forward copy corrupts data because + * we write to positions before reading from them. + * + * Example: src=100, dst=105, match_size=8 + * - Forward copy at i=5 would read window[105], but we already overwrote it at i=0! + * - Must copy in REVERSE order (end to start) to read source bytes before overwriting. + */ if (TAMP_UNLIKELY(src_to_dst < match_size && src_to_dst > 0)) { - /* Overlap with dst > src: copy backwards to avoid corruption. */ + /* Copy in reverse order: start from last byte, work backwards to first byte. + * This ensures we read all overlapping source bytes before they're overwritten. + * Destination wraps via mask; source doesn't need wrapping (pre-validated bounds). */ for (uint8_t i = match_size; i-- > 0;) { window[(*window_pos + i) & window_mask] = window[window_offset + i]; } From 57aeacb801f549601d1a846f6e607e299ee4eca8 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 1 Feb 2026 13:56:41 -0500 Subject: [PATCH 031/109] cleanup decompressor v2 flag check. --- tamp/_c_src/tamp/decompressor.c | 12 +++++++----- tamp/_c_src/tamp/decompressor.h | 4 +--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 5a289232..5222f4fb 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -286,10 +286,13 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu } *output_written_size += to_write; - /* Update window only on complete decode. */ + /* Update window only on complete decode. + * Write up to end of buffer (no wrap), matching RLE behavior. */ if (d->token_state == TOKEN_NONE) { uint16_t wp = d->window_pos; - window_copy(d->window, &wp, window_offset, match_size, window_size - 1); + uint16_t remaining = window_size - wp; + uint8_t window_write = (match_size < remaining) ? match_size : remaining; + window_copy(d->window, &wp, window_offset, window_write, window_size - 1); d->window_pos = wp; } @@ -329,10 +332,9 @@ static tamp_res tamp_decompressor_populate_from_conf(TampDecompressor* decompres decompressor->conf_literal = conf_literal; decompressor->min_pattern_size = tamp_compute_min_pattern_size(conf_window, conf_literal); decompressor->configured = true; -#if TAMP_V2_DECOMPRESS decompressor->conf_v2 = conf_v2; -#else - (void)conf_v2; +#if !TAMP_V2_DECOMPRESS + if (conf_v2) return TAMP_INVALID_CONF; // v2 stream but v2 support not compiled in #endif return TAMP_OK; diff --git a/tamp/_c_src/tamp/decompressor.h b/tamp/_c_src/tamp/decompressor.h index 8e333ba8..7f1b5843 100644 --- a/tamp/_c_src/tamp/decompressor.h +++ b/tamp/_c_src/tamp/decompressor.h @@ -40,9 +40,7 @@ typedef struct { uint8_t conf_window : 4; // Window bits from config uint8_t conf_literal : 4; // Literal bits from config uint8_t min_pattern_size : 2; // Minimum pattern size, 2 or 3 -#if TAMP_V2_DECOMPRESS - uint8_t conf_v2 : 1; // v2 format enabled (from header) -#endif + uint8_t conf_v2 : 1; // v2 format enabled (from header) /* COLD: rarely accessed (init or edge cases). * Bitfields save space; add new cold fields here. */ From edb6ed41884d178a6964d6a6b396ece0724b8254 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 1 Feb 2026 18:17:41 -0500 Subject: [PATCH 032/109] decompressor fix. --- tamp/_c_src/tamp/decompressor.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 5222f4fb..29e4fb7f 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -431,7 +431,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne } if (res == TAMP_INPUT_EXHAUSTED) { refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); - if (input == input_end) return TAMP_INPUT_EXHAUSTED; + /* Don't return yet - we may have refilled enough bits to continue. + * The loop condition will exit when both input is exhausted AND + * pos_and_state (including token_state) is zero. */ continue; } if (res != TAMP_OK) return res; From d2a6f3c4c6f7de55d075261a4f0af9fd2b2e4dfb Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 09:05:09 -0500 Subject: [PATCH 033/109] fix infinite loop --- tamp/_c_src/tamp/decompressor.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 29e4fb7f..00169998 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -430,10 +430,13 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne res = decode_extended_match(decompressor, &output, output_end, output_written_size); } if (res == TAMP_INPUT_EXHAUSTED) { + uint8_t old_bit_pos = decompressor->bit_buffer_pos; refill_bit_buffer(decompressor, &input, input_end, input_consumed_size); - /* Don't return yet - we may have refilled enough bits to continue. - * The loop condition will exit when both input is exhausted AND - * pos_and_state (including token_state) is zero. */ + /* If we couldn't get more bits and input is exhausted, stop. + * Otherwise the loop would run forever with token_state set. */ + if (decompressor->bit_buffer_pos == old_bit_pos && input == input_end) { + return TAMP_INPUT_EXHAUSTED; + } continue; } if (res != TAMP_OK) return res; From f55772bdc9d15a986b9a50e2227bab6defea837a Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 09:52:44 -0500 Subject: [PATCH 034/109] configure pytest in pyproject.toml --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index cf5b40ed..d2b2b717 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,9 @@ unittest = [ ] uprofiler = "https://github.com/BrianPugh/micropython-libs/blob/main/lib/uprofiler.py" +[tool.pytest.ini_options] +testpaths = ["tests"] + [tool.coverage.run] branch = true omit = [ From 9d1b7858398fcb00692c122e13ee4c0ce7a5dd64 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 09:56:32 -0500 Subject: [PATCH 035/109] initial v2 c compressor implementation; needs optimizing. --- tamp/_c_src/tamp/compressor.c | 363 +++++++++++++++++++++++++++++++++- tamp/_c_src/tamp/compressor.h | 20 ++ 2 files changed, 377 insertions(+), 6 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 0e06100f..7347bfa0 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -9,7 +9,13 @@ #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2 * !!(condition)])) +#if TAMP_V2_COMPRESS +// V2 max pattern: min_pattern_size + 11 + 112 = min_pattern_size + 123 +#define MAX_PATTERN_SIZE_V2 (compressor->min_pattern_size + 123) +#define MAX_PATTERN_SIZE (compressor->conf_v2 ? MAX_PATTERN_SIZE_V2 : (compressor->min_pattern_size + 13)) +#else #define MAX_PATTERN_SIZE (compressor->min_pattern_size + 13) +#endif #define WINDOW_SIZE (1 << compressor->conf_window) // 0xF because sizeof(TampCompressor.input) == 16; #define input_add(offset) ((compressor->input_pos + offset) & 0xF) @@ -23,23 +29,64 @@ static const uint8_t huffman_codes[] = {0x0, 0x3, 0x8, 0xb, 0x14, 0x24, 0x26, 0x // These bit lengths pre-add the 1 bit for the 0-value is_literal flag. static const uint8_t huffman_bits[] = {0x2, 0x3, 0x5, 0x5, 0x6, 0x7, 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0x9, 0x7}; +#if TAMP_V2_COMPRESS +// V2: Maximum RLE count = (13 << 4) + 15 + 2 = 225 +#define RLE_MAX_COUNT 225 +// V2: Maximum extended match extra = (13 << 3) + 7 + 1 = 112 +// Total max match = min_pattern_size + 11 + 112 = min_pattern_size + 123 +#define EXTENDED_MATCH_MAX_EXTRA 112 +#endif + static inline void write_to_bit_buffer(TampCompressor *compressor, uint32_t bits, uint8_t n_bits) { compressor->bit_buffer_pos += n_bits; compressor->bit_buffer |= bits << (32 - compressor->bit_buffer_pos); } +#if TAMP_V2_COMPRESS +/** + * @brief Write extended huffman encoding (huffman + trailing bits). + * + * Used for both RLE count and extended match size encoding. + * + * @param[in,out] compressor Compressor with bit buffer. + * @param[in] value The value to encode. + * @param[in] trailing_bits Number of trailing bits (3 for extended match, 4 for RLE). + */ +static inline void write_extended_huffman(TampCompressor *compressor, uint8_t value, uint8_t trailing_bits) { + uint8_t mask = (1 << trailing_bits) - 1; + uint8_t code_index = value >> trailing_bits; + // Write huffman code without literal flag (subtract 1 from bit length) + write_to_bit_buffer(compressor, huffman_codes[code_index], huffman_bits[code_index] - 1); + write_to_bit_buffer(compressor, value & mask, trailing_bits); +} + +#endif // TAMP_V2_COMPRESS + /** * @brief Partially flush the internal bit buffer. * - * Up to 7 bits may remain in the internal bit buffer. + * @param[in] min_threshold Minimum bit_buffer_pos to trigger flush. Default 8 flushes whole bytes. + * Use 7 for extended match tokens which need up to 26 bits (32 - 26 = 6 max remaining). + * + * Up to (min_threshold - 1) bits may remain in the internal bit buffer. */ -static inline tamp_res partial_flush(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size) { - for (*output_written_size = output_size; compressor->bit_buffer_pos >= 8 && output_size; +static inline tamp_res partial_flush_threshold(TampCompressor *compressor, unsigned char *output, size_t output_size, + size_t *output_written_size, uint8_t min_threshold) { + for (*output_written_size = output_size; compressor->bit_buffer_pos >= min_threshold && output_size; output_size--, compressor->bit_buffer_pos -= 8, compressor->bit_buffer <<= 8) *output++ = compressor->bit_buffer >> 24; *output_written_size -= output_size; - return (compressor->bit_buffer_pos >= 8) ? TAMP_OUTPUT_FULL : TAMP_OK; + return (compressor->bit_buffer_pos >= min_threshold) ? TAMP_OUTPUT_FULL : TAMP_OK; +} + +/** + * @brief Partially flush the internal bit buffer. + * + * Up to 7 bits may remain in the internal bit buffer. + */ +static inline tamp_res partial_flush(TampCompressor *compressor, unsigned char *output, size_t output_size, + size_t *output_written_size) { + return partial_flush_threshold(compressor, output, output_size, output_written_size, 8); } inline bool tamp_compressor_full(const TampCompressor *compressor) { @@ -142,6 +189,9 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, .use_custom_dictionary = false, #if TAMP_LAZY_MATCHING .lazy_matching = false, +#endif +#if TAMP_V2_COMPRESS + .v2 = true, // Default to v2 format #endif }; if (!conf) conf = &conf_default; @@ -157,6 +207,9 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, #if TAMP_LAZY_MATCHING compressor->conf_lazy_matching = conf->lazy_matching; #endif +#if TAMP_V2_COMPRESS + compressor->conf_v2 = conf->v2; +#endif compressor->window = window; compressor->min_pattern_size = tamp_compute_min_pattern_size(conf->window, conf->literal); @@ -171,12 +224,120 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, write_to_bit_buffer(compressor, conf->window - 8, 3); write_to_bit_buffer(compressor, conf->literal - 5, 2); write_to_bit_buffer(compressor, conf->use_custom_dictionary, 1); - write_to_bit_buffer(compressor, 0, 1); // Reserved +#if TAMP_V2_COMPRESS + write_to_bit_buffer(compressor, conf->v2, 1); // v2 format flag +#else + write_to_bit_buffer(compressor, 0, 1); // Reserved (v1 only) +#endif write_to_bit_buffer(compressor, 0, 1); // No more header bytes return TAMP_OK; } +#if TAMP_V2_COMPRESS +/** + * @brief Get the last byte written to the window. + */ +static inline uint8_t get_last_window_byte(TampCompressor *compressor) { + uint16_t prev_pos = (compressor->window_pos - 1) & ((1 << compressor->conf_window) - 1); + return compressor->window[prev_pos]; +} + +/** + * @brief Write RLE token to bit buffer and update window. + * + * @param[in,out] compressor Compressor state. + * @param[in] count Number of repeated bytes (must be >= 2). + */ +static void write_rle_token(TampCompressor *compressor, uint8_t count) { + const uint16_t window_mask = (1 << compressor->conf_window) - 1; + uint8_t symbol = get_last_window_byte(compressor); + + // Write RLE symbol (12) with literal flag + // Note: symbols 12 and 13 are at indices 12 and 13 in huffman table (not offset by min_pattern_size) + write_to_bit_buffer(compressor, huffman_codes[TAMP_RLE_SYMBOL], huffman_bits[TAMP_RLE_SYMBOL]); + // Write extended huffman for count-2 + write_extended_huffman(compressor, count - 2, TAMP_LEADING_RLE_BITS); + + // Write up to TAMP_RLE_MAX_WINDOW bytes to window (or until buffer end, no wrap) + uint16_t remaining = WINDOW_SIZE - compressor->window_pos; + uint8_t window_write = MIN(MIN(count, TAMP_RLE_MAX_WINDOW), remaining); + for (uint8_t i = 0; i < window_write; i++) { + compressor->window[compressor->window_pos] = symbol; + compressor->window_pos = (compressor->window_pos + 1) & window_mask; + } +} + +/** + * @brief Write extended match token to bit buffer and update window. + * + * Extended match tokens can be up to 26 bits (7 symbol + 9 extended_huffman + 10 window). + * To avoid overflowing the 32-bit buffer when starting with up to 7 bits remaining, + * we do intermediate flushes between token parts. + * + * @param[in,out] compressor Compressor state. + * @param[out] output Output buffer for flushed bytes. + * @param[in] output_size Available space in output buffer. + * @param[out] output_written_size Bytes written to output. + * @return TAMP_OK on success, TAMP_OUTPUT_FULL if output buffer is too small. + */ +static tamp_res write_extended_match_token(TampCompressor *compressor, unsigned char *output, size_t output_size, + size_t *output_written_size) { + const uint16_t window_mask = (1 << compressor->conf_window) - 1; + const uint8_t count = compressor->extended_match_count; + const uint16_t position = compressor->extended_match_position; + tamp_res res; + size_t flush_bytes; + + *output_written_size = 0; + + // Write extended match symbol (13) with literal flag (7 bits) + // Note: symbols 12 and 13 are at indices 12 and 13 in huffman table (not offset by min_pattern_size) + write_to_bit_buffer(compressor, huffman_codes[TAMP_EXTENDED_MATCH_SYMBOL], + huffman_bits[TAMP_EXTENDED_MATCH_SYMBOL]); + + // Flush to make room for extended huffman (up to 9 bits) + res = partial_flush(compressor, output, output_size, &flush_bytes); + *output_written_size += flush_bytes; + output += flush_bytes; + output_size -= flush_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + + // Write extended huffman for (count - min_pattern_size - 11 - 1) + write_extended_huffman(compressor, count - compressor->min_pattern_size - 11 - 1, TAMP_LEADING_EXTENDED_MATCH_BITS); + + // Flush to make room for window position (10 bits) + res = partial_flush(compressor, output, output_size, &flush_bytes); + *output_written_size += flush_bytes; + output += flush_bytes; + output_size -= flush_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + + // Write window position + write_to_bit_buffer(compressor, position, compressor->conf_window); + + // Flush any remaining complete bytes + res = partial_flush(compressor, output, output_size, &flush_bytes); + *output_written_size += flush_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + + // Write to window (up to end of buffer, no wrap) + uint16_t remaining = WINDOW_SIZE - compressor->window_pos; + uint8_t window_write = MIN(count, remaining); + for (uint8_t i = 0; i < window_write; i++) { + compressor->window[compressor->window_pos] = compressor->window[position + i]; + compressor->window_pos++; + } + compressor->window_pos &= window_mask; + + // Reset extended match state + compressor->extended_match_count = 0; + compressor->extended_match_position = 0; + + return TAMP_OK; +} +#endif // TAMP_V2_COMPRESS + TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned char *output, size_t output_size, size_t *output_written_size) { tamp_res res; @@ -200,9 +361,132 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned if (TAMP_UNLIKELY(output_size == 0)) return TAMP_OUTPUT_FULL; +#if TAMP_V2_COMPRESS + // V2: Handle extended match continuation + if (TAMP_UNLIKELY(compressor->conf_v2 && compressor->extended_match_count)) { + // We're in extended match mode - try to extend the match using search-based extension + // (matching Python's behavior: search for current_pattern + next_byte in window) + const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; + const unsigned char *window = compressor->window; + + while (compressor->input_size > 0) { + // Check if extending would go beyond window buffer boundary (no wrap-around) + if (compressor->extended_match_position + compressor->extended_match_count >= WINDOW_SIZE) { + size_t token_bytes; + res = write_extended_match_token(compressor, output, output_size, &token_bytes); + (*output_written_size) += token_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + return TAMP_OK; + } + + // Check if we've reached max extended match size + if (compressor->extended_match_count >= max_ext_match) { + size_t token_bytes; + res = write_extended_match_token(compressor, output, output_size, &token_bytes); + (*output_written_size) += token_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + return TAMP_OK; + } + + // Search-based extension: search for pattern + next_byte in window + // This matches Python's _search behavior for extended match extension + const uint8_t current_count = compressor->extended_match_count; + const uint16_t current_pos = compressor->extended_match_position; + const uint8_t next_byte = read_input(0); + const uint8_t target_len = current_count + 1; + + // Search window for a position where target_len bytes match + // Start from current_pos and search forward + uint16_t best_pos = current_pos; + uint8_t best_len = 0; + + for (uint16_t search_pos = current_pos; search_pos + target_len <= WINDOW_SIZE; search_pos++) { + // Check if prefix matches + uint8_t match_len = 0; + for (uint8_t i = 0; i < current_count && match_len == i; i++) { + if (window[search_pos + i] == window[current_pos + i]) { + match_len = i + 1; + } + } + if (match_len < current_count) continue; // Current pattern doesn't match here + + // Check if next_byte also matches + if (window[search_pos + current_count] == next_byte) { + // Found a match of target_len bytes + best_pos = search_pos; + best_len = target_len; + break; // Take first match (same as Python's index()) + } + } + + if (best_len > current_count) { + // Found longer match - update position and count + compressor->extended_match_count = best_len; + compressor->extended_match_position = best_pos; + compressor->input_pos = input_add(1); + compressor->input_size--; + + if (compressor->extended_match_count >= max_ext_match) { + size_t token_bytes; + res = write_extended_match_token(compressor, output, output_size, &token_bytes); + (*output_written_size) += token_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + return TAMP_OK; + } + } else { + // Match ended - emit current match + size_t token_bytes; + res = write_extended_match_token(compressor, output, output_size, &token_bytes); + (*output_written_size) += token_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + return TAMP_OK; + } + } + // Ran out of input while extending - return and wait for more + return TAMP_OK; + } +#endif // TAMP_V2_COMPRESS + uint8_t match_size = 0; uint16_t match_index = 0; +#if TAMP_V2_COMPRESS + // V2: Handle RLE accumulation with persistent state + // For simplicity in C, we commit RLE immediately when the run ends + if (TAMP_UNLIKELY(compressor->conf_v2)) { + uint8_t last_byte = get_last_window_byte(compressor); + + // Count and CONSUME matching bytes + while (compressor->input_size > 0 && compressor->rle_count < RLE_MAX_COUNT) { + if (read_input(0) == last_byte) { + compressor->rle_count++; + compressor->input_pos = input_add(1); + compressor->input_size--; + } else { + break; + } + } + + // If we consumed whole buffer and haven't hit max, return (accumulate more) + if (compressor->input_size == 0 && compressor->rle_count < RLE_MAX_COUNT && compressor->rle_count > 0) { + return TAMP_OK; + } + + // RLE run has ended + if (compressor->rle_count >= 2) { + // Commit the RLE (simplified approach for C) + write_rle_token(compressor, compressor->rle_count); + compressor->rle_count = 0; + return TAMP_OK; + } else if (compressor->rle_count == 1) { + // Single byte - push it back to input for normal literal encoding + compressor->input_pos = input_add(-1); + compressor->input_size++; + compressor->rle_count = 0; + } + } +#endif // TAMP_V2_COMPRESS + #if TAMP_LAZY_MATCHING if (compressor->conf_lazy_matching) { // Check if we have a cached match from lazy matching @@ -250,6 +534,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } else { // Use current match, clear cache compressor->cached_match_index = -1; + // Note: No V2 extended match check here - we're in the match_size <= 8 branch, + // so extended matches (which require match_size > min_pattern_size + 11) are impossible. uint8_t huffman_index = match_size - compressor->min_pattern_size; write_to_bit_buffer(compressor, huffman_codes[huffman_index], huffman_bits[huffman_index]); write_to_bit_buffer(compressor, match_index, compressor->conf_window); @@ -266,6 +552,17 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } else { // Write TOKEN compressor->cached_match_index = -1; // Clear cache +#if TAMP_V2_COMPRESS + // V2: Check for extended match + if (compressor->conf_v2 && match_size > compressor->min_pattern_size + 11) { + compressor->extended_match_count = match_size; + compressor->extended_match_position = match_index; + // Consume matched bytes from input + compressor->input_pos = input_add(match_size); + compressor->input_size -= match_size; + return TAMP_OK; + } +#endif uint8_t huffman_index = match_size - compressor->min_pattern_size; write_to_bit_buffer(compressor, huffman_codes[huffman_index], huffman_bits[huffman_index]); write_to_bit_buffer(compressor, match_index, compressor->conf_window); @@ -283,6 +580,17 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf_literal + 1); } else { +#if TAMP_V2_COMPRESS + // V2: Check for extended match + if (compressor->conf_v2 && match_size > compressor->min_pattern_size + 11) { + compressor->extended_match_count = match_size; + compressor->extended_match_position = match_index; + // Consume matched bytes from input + compressor->input_pos = input_add(match_size); + compressor->input_size -= match_size; + return TAMP_OK; + } +#endif // Write TOKEN uint8_t huffman_index = match_size - compressor->min_pattern_size; write_to_bit_buffer(compressor, huffman_codes[huffman_index], huffman_bits[huffman_index]); @@ -375,6 +683,49 @@ tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output output += chunk_output_written_size; } +#if TAMP_V2_COMPRESS + // V2: Flush any pending RLE + if (compressor->conf_v2 && compressor->rle_count >= 1) { + // Partial flush first to make room + res = partial_flush(compressor, output, output_size, &chunk_output_written_size); + (*output_written_size) += chunk_output_written_size; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + output_size -= chunk_output_written_size; + output += chunk_output_written_size; + + if (compressor->rle_count == 1) { + // Single byte - write as literal (can't use RLE token for count < 2) + uint8_t literal = get_last_window_byte(compressor); + write_to_bit_buffer(compressor, IS_LITERAL_FLAG | literal, compressor->conf_literal + 1); + + // Write to window + const uint16_t window_mask = (1 << compressor->conf_window) - 1; + compressor->window[compressor->window_pos] = literal; + compressor->window_pos = (compressor->window_pos + 1) & window_mask; + } else { + // count >= 2: write as RLE token + write_rle_token(compressor, compressor->rle_count); + } + compressor->rle_count = 0; + + // Partial flush again after writing token + res = partial_flush(compressor, output, output_size, &chunk_output_written_size); + (*output_written_size) += chunk_output_written_size; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + output_size -= chunk_output_written_size; + output += chunk_output_written_size; + } + + // V2: Flush any pending extended match + if (compressor->conf_v2 && compressor->extended_match_count) { + res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); + (*output_written_size) += chunk_output_written_size; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + output_size -= chunk_output_written_size; + output += chunk_output_written_size; + } +#endif + // Perform partial flush to see if we need a FLUSH token (check if output buffer in not empty), // and to subsequently make room for the FLUSH token. res = partial_flush(compressor, output, output_size, &chunk_output_written_size); diff --git a/tamp/_c_src/tamp/compressor.h b/tamp/_c_src/tamp/compressor.h index cd6ca1f0..91de900f 100644 --- a/tamp/_c_src/tamp/compressor.h +++ b/tamp/_c_src/tamp/compressor.h @@ -24,6 +24,9 @@ typedef struct TampCompressor { uint8_t conf_use_custom_dictionary; // Use a custom initialized dictionary. #if TAMP_LAZY_MATCHING uint8_t conf_lazy_matching; // Use lazy matching for better compression +#endif +#if TAMP_V2_COMPRESS + uint8_t conf_v2; // Use v2 format (RLE, extended match) #endif uint8_t min_pattern_size; @@ -32,6 +35,13 @@ typedef struct TampCompressor { int16_t cached_match_index; uint8_t cached_match_size; #endif + +#if TAMP_V2_COMPRESS + /* V2 state */ + uint8_t rle_count; // Current RLE run length (max 225) + uint8_t extended_match_count; // Current extended match size (max ~126) + uint16_t extended_match_position; // Window position for extended match +#endif #else // Use bitfields for reduced memory-usage /* Conf attributes */ uint32_t conf_window : 4; // number of window bits @@ -40,6 +50,9 @@ typedef struct TampCompressor { #if TAMP_LAZY_MATCHING uint32_t conf_lazy_matching : 1; // Use lazy matching for better compression #endif +#if TAMP_V2_COMPRESS + uint32_t conf_v2 : 1; // Use v2 format (RLE, extended match) +#endif /* Other small attributes */ uint32_t window_pos : 15; @@ -54,6 +67,13 @@ typedef struct TampCompressor { int16_t cached_match_index; uint8_t cached_match_size; #endif + +#if TAMP_V2_COMPRESS + /* V2 state */ + uint8_t rle_count; // Current RLE run length (max 225) + uint8_t extended_match_count; // Current extended match size (max ~126) + uint16_t extended_match_position; // Window position for extended match +#endif #endif // TAMP_ESP32 unsigned char input[16] /* __attribute__ ((aligned (16)))*/; uint32_t bit_buffer; From 775ee58f0f815d6c083f5fd3725dadc14f56362c Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 10:26:29 -0500 Subject: [PATCH 036/109] v2-compressed datasets --- .gitattributes | 1 + .gitignore | 2 + .../RPI_PICO-20250415-v1.25.0.uf2.tamp | 3 + datasets/v2-compressed/dickens.tamp | 3 + datasets/v2-compressed/enwik8.tamp | 3 + datasets/v2-compressed/mozilla.tamp | 3 + datasets/v2-compressed/mr.tamp | 3 + datasets/v2-compressed/nci.tamp | 3 + datasets/v2-compressed/ooffice.tamp | 3 + datasets/v2-compressed/osdb.tamp | 3 + datasets/v2-compressed/reymont.tamp | 3 + datasets/v2-compressed/samba.tamp | 3 + datasets/v2-compressed/sao.tamp | 3 + datasets/v2-compressed/webster.tamp | 3 + datasets/v2-compressed/x-ray.tamp | 3 + datasets/v2-compressed/xml.tamp | 3 + tests/test_dataset_regression.py | 76 +++++++++++++++++++ 17 files changed, 121 insertions(+) create mode 100644 datasets/v2-compressed/RPI_PICO-20250415-v1.25.0.uf2.tamp create mode 100644 datasets/v2-compressed/dickens.tamp create mode 100644 datasets/v2-compressed/enwik8.tamp create mode 100644 datasets/v2-compressed/mozilla.tamp create mode 100644 datasets/v2-compressed/mr.tamp create mode 100644 datasets/v2-compressed/nci.tamp create mode 100644 datasets/v2-compressed/ooffice.tamp create mode 100644 datasets/v2-compressed/osdb.tamp create mode 100644 datasets/v2-compressed/reymont.tamp create mode 100644 datasets/v2-compressed/samba.tamp create mode 100644 datasets/v2-compressed/sao.tamp create mode 100644 datasets/v2-compressed/webster.tamp create mode 100644 datasets/v2-compressed/x-ray.tamp create mode 100644 datasets/v2-compressed/xml.tamp diff --git a/.gitattributes b/.gitattributes index 5f6c2223..530b7adf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ datasets/v1-compressed/** filter=lfs diff=lfs merge=lfs -text +datasets/v2-compressed/** filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 4344bd22..34568b58 100644 --- a/.gitignore +++ b/.gitignore @@ -249,10 +249,12 @@ Temporary Items # Compression benchmark datasets datasets/* !datasets/v1-compressed/ +!datasets/v2-compressed/ enwik8* *.pkl *.tamp !datasets/v1-compressed/** +!datasets/v2-compressed/** # Cython-generated files tamp/_c_compressor.c diff --git a/datasets/v2-compressed/RPI_PICO-20250415-v1.25.0.uf2.tamp b/datasets/v2-compressed/RPI_PICO-20250415-v1.25.0.uf2.tamp new file mode 100644 index 00000000..1232a502 --- /dev/null +++ b/datasets/v2-compressed/RPI_PICO-20250415-v1.25.0.uf2.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fb7efbdc484cbf400817074910275c3f2a89aec0ab96c8984fd58423e5e9290 +size 291036 diff --git a/datasets/v2-compressed/dickens.tamp b/datasets/v2-compressed/dickens.tamp new file mode 100644 index 00000000..a1ed82b9 --- /dev/null +++ b/datasets/v2-compressed/dickens.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db20dcfa7e76829a809a4c9d253f1b4e53b294e86db789490bc4fadb19ab5dc0 +size 5538332 diff --git a/datasets/v2-compressed/enwik8.tamp b/datasets/v2-compressed/enwik8.tamp new file mode 100644 index 00000000..edf7491b --- /dev/null +++ b/datasets/v2-compressed/enwik8.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f3e70e49e7344ebbe3ab23d274910f7aff5bb1fb1212658b1f136a99d244f4 +size 51019055 diff --git a/datasets/v2-compressed/mozilla.tamp b/datasets/v2-compressed/mozilla.tamp new file mode 100644 index 00000000..b1ee2c59 --- /dev/null +++ b/datasets/v2-compressed/mozilla.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cedba7ad7710757f3c5e7bf8176fd92eede9b8e5be2e8e697f9a6dc15d45718 +size 24415401 diff --git a/datasets/v2-compressed/mr.tamp b/datasets/v2-compressed/mr.tamp new file mode 100644 index 00000000..404f4170 --- /dev/null +++ b/datasets/v2-compressed/mr.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a619cf3440c067f6bd5eebcf6b2145d583feca83067244c0c2585aecb4b3cae +size 4519929 diff --git a/datasets/v2-compressed/nci.tamp b/datasets/v2-compressed/nci.tamp new file mode 100644 index 00000000..085b3bfa --- /dev/null +++ b/datasets/v2-compressed/nci.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf1eac94e48d44092a9e64a7e8d14e1452b357f176e2aba7ed26eb2b7340946 +size 6855616 diff --git a/datasets/v2-compressed/ooffice.tamp b/datasets/v2-compressed/ooffice.tamp new file mode 100644 index 00000000..d8243440 --- /dev/null +++ b/datasets/v2-compressed/ooffice.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9376fe3e9f0286c1edbfafeadb1a8d997dd8524a32dbce7438636f270d61789 +size 3773219 diff --git a/datasets/v2-compressed/osdb.tamp b/datasets/v2-compressed/osdb.tamp new file mode 100644 index 00000000..5dac28be --- /dev/null +++ b/datasets/v2-compressed/osdb.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3188714fc3361691b684fecbff75b2d2cb9e6e690887aec456469d7505a586 +size 8466736 diff --git a/datasets/v2-compressed/reymont.tamp b/datasets/v2-compressed/reymont.tamp new file mode 100644 index 00000000..14072341 --- /dev/null +++ b/datasets/v2-compressed/reymont.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e45bf12f0ca5971b47cfca38b2cc47216c93e13915b9b3ac19aa4195b9e87d5 +size 2818601 diff --git a/datasets/v2-compressed/samba.tamp b/datasets/v2-compressed/samba.tamp new file mode 100644 index 00000000..259d6ccc --- /dev/null +++ b/datasets/v2-compressed/samba.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac74af80542ad88dd617b95047e2a77e820cfcac3ef17abb8904949b87fd605f +size 8386303 diff --git a/datasets/v2-compressed/sao.tamp b/datasets/v2-compressed/sao.tamp new file mode 100644 index 00000000..46c39e26 --- /dev/null +++ b/datasets/v2-compressed/sao.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c05ac1c7d78b04874f07e10265cd254ecf9d6dcf1a3f0d1ea695815509ff0b1 +size 6136077 diff --git a/datasets/v2-compressed/webster.tamp b/datasets/v2-compressed/webster.tamp new file mode 100644 index 00000000..6c6835a9 --- /dev/null +++ b/datasets/v2-compressed/webster.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1227df26234f9c9cf53d9be82d6a38ecd344db70bd6b25109a6e5ae1d4c1673f +size 18146647 diff --git a/datasets/v2-compressed/x-ray.tamp b/datasets/v2-compressed/x-ray.tamp new file mode 100644 index 00000000..981eb1dd --- /dev/null +++ b/datasets/v2-compressed/x-ray.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba0c1fb79addae24888c12a466e84b73c32ca608836c458487226d224a63fc3 +size 7509449 diff --git a/datasets/v2-compressed/xml.tamp b/datasets/v2-compressed/xml.tamp new file mode 100644 index 00000000..67e68827 --- /dev/null +++ b/datasets/v2-compressed/xml.tamp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6adb3788d5644d28c56ea785ea4149a6f0b8ba5562c4f8dcc4a4ba8371920e97 +size 1473552 diff --git a/tests/test_dataset_regression.py b/tests/test_dataset_regression.py index 734401e6..448619f6 100644 --- a/tests/test_dataset_regression.py +++ b/tests/test_dataset_regression.py @@ -73,6 +73,66 @@ ), ] +# V2 format datasets (uses RLE and Extended Match encoding) +V2_DATASETS = [ + ( + "datasets/v2-compressed/RPI_PICO-20250415-v1.25.0.uf2.tamp", + "e0c40eacf1afc550a6add74888c48bb981b28788a6d75a62a0e2444e997b9864", + ), + ( + "datasets/v2-compressed/dickens.tamp", + "b24c37886142e11d0ee687db6ab06f936207aa7f2ea1fd1d9a36763c7a507e6a", + ), + ( + "datasets/v2-compressed/mr.tamp", + "68637ed52e3e4860174ed2dc0840ac77d5f1a60abbcb13770d5754e3774d53e6", + ), + ( + "datasets/v2-compressed/ooffice.tamp", + "e7ee013880d34dd5208283d0d3d91b07f442e067454276095ded14f322a656eb", + ), + ( + "datasets/v2-compressed/osdb.tamp", + "60f027179302ca3ad87c58ac90b6be72ec23588aaa7a3b7fe8ecc0f11def3fa3", + ), + ( + "datasets/v2-compressed/reymont.tamp", + "0eac0114a3dfe6e2ee1f345a0f79d653cb26c3bc9f0ed79238af4933422b7578", + ), + ( + "datasets/v2-compressed/sao.tamp", + "c2d0ea2cc59d4c21b7fe43a71499342a00cbe530a1d5548770e91ecd6214adcc", + ), + ( + "datasets/v2-compressed/x-ray.tamp", + "7de9fce1405dc44ae5e6813ed21cd5751e761bd4265655a005d39b9685d1c9ad", + ), + ( + "datasets/v2-compressed/xml.tamp", + "0e82e54e695c1938e4193448022543845b33020c8be6bf3bf3ead2224903e08c", + ), + ( + "datasets/v2-compressed/samba.tamp", + "93ba07bc44d8267789c1d911992f40b089ffa2140b4a160fac11ccae9a40e7b2", + ), + ( + "datasets/v2-compressed/nci.tamp", + "fc63a31770947b8c2062d3b19ca94c00485a232bb91b502021948fee983e1635", + ), + ( + "datasets/v2-compressed/webster.tamp", + "6a68f69b26daf09f9dd84f7470368553194a0b294fcfa80f1604efb11143a383", + ), + ( + "datasets/v2-compressed/mozilla.tamp", + "657fc3764b0c75ac9de9623125705831ebbfbe08fed248df73bc2dc66e2a963b", + ), + ( + "datasets/v2-compressed/enwik8.tamp", + "2b49720ec4d78c3c9fabaee6e4179a5e997302b3a70029f30f2d582218c024a8", + ), +] + class TestV1Decompression(unittest.TestCase): @pytest.mark.dataset @@ -90,5 +150,21 @@ def test_v1_decompress(self): self.assertEqual(actual, expected_sha256, f"SHA256 mismatch for {rel_path} using {impl_name}") +class TestV2Decompression(unittest.TestCase): + @pytest.mark.dataset + def test_v2_decompress(self): + for impl_name, decompress_func in DECOMPRESSOR_IMPLEMENTATIONS: + for rel_path, expected_sha256 in V2_DATASETS: + with self.subTest(implementation=impl_name, dataset=rel_path): + path = PROJECT_DIR / rel_path + + with open(path, "rb") as f: + data = f.read() + + decompressed = decompress_func(data) + actual = hashlib.sha256(decompressed).hexdigest() + self.assertEqual(actual, expected_sha256, f"SHA256 mismatch for {rel_path} using {impl_name}") + + if __name__ == "__main__": unittest.main() From b573f3aeca42a75c9b34ba9754e96136923165d4 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 13:07:52 -0500 Subject: [PATCH 037/109] simplify pattern-extending. --- tamp/_c_src/tamp/compressor.c | 58 +++++++---------------------------- 1 file changed, 11 insertions(+), 47 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 7347bfa0..ed6791d8 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -364,14 +364,16 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned #if TAMP_V2_COMPRESS // V2: Handle extended match continuation if (TAMP_UNLIKELY(compressor->conf_v2 && compressor->extended_match_count)) { - // We're in extended match mode - try to extend the match using search-based extension - // (matching Python's behavior: search for current_pattern + next_byte in window) + // We're in extended match mode - try to extend the match at the current position const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; const unsigned char *window = compressor->window; while (compressor->input_size > 0) { + const uint16_t current_pos = compressor->extended_match_position; + const uint8_t current_count = compressor->extended_match_count; + // Check if extending would go beyond window buffer boundary (no wrap-around) - if (compressor->extended_match_position + compressor->extended_match_count >= WINDOW_SIZE) { + if (current_pos + current_count >= WINDOW_SIZE) { size_t token_bytes; res = write_extended_match_token(compressor, output, output_size, &token_bytes); (*output_written_size) += token_bytes; @@ -380,7 +382,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } // Check if we've reached max extended match size - if (compressor->extended_match_count >= max_ext_match) { + if (current_count >= max_ext_match) { size_t token_bytes; res = write_extended_match_token(compressor, output, output_size, &token_bytes); (*output_written_size) += token_bytes; @@ -388,51 +390,13 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned return TAMP_OK; } - // Search-based extension: search for pattern + next_byte in window - // This matches Python's _search behavior for extended match extension - const uint8_t current_count = compressor->extended_match_count; - const uint16_t current_pos = compressor->extended_match_position; - const uint8_t next_byte = read_input(0); - const uint8_t target_len = current_count + 1; - - // Search window for a position where target_len bytes match - // Start from current_pos and search forward - uint16_t best_pos = current_pos; - uint8_t best_len = 0; - - for (uint16_t search_pos = current_pos; search_pos + target_len <= WINDOW_SIZE; search_pos++) { - // Check if prefix matches - uint8_t match_len = 0; - for (uint8_t i = 0; i < current_count && match_len == i; i++) { - if (window[search_pos + i] == window[current_pos + i]) { - match_len = i + 1; - } - } - if (match_len < current_count) continue; // Current pattern doesn't match here - - // Check if next_byte also matches - if (window[search_pos + current_count] == next_byte) { - // Found a match of target_len bytes - best_pos = search_pos; - best_len = target_len; - break; // Take first match (same as Python's index()) - } - } - - if (best_len > current_count) { - // Found longer match - update position and count - compressor->extended_match_count = best_len; - compressor->extended_match_position = best_pos; + // O(1) extension check: does the next byte at current position match input? + if (window[current_pos + current_count] == read_input(0)) { + // Extension successful - consume input byte and increment count + compressor->extended_match_count++; compressor->input_pos = input_add(1); compressor->input_size--; - - if (compressor->extended_match_count >= max_ext_match) { - size_t token_bytes; - res = write_extended_match_token(compressor, output, output_size, &token_bytes); - (*output_written_size) += token_bytes; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - return TAMP_OK; - } + // Continue to next iteration to try extending further } else { // Match ended - emit current match size_t token_bytes; From 391efdc6621fed18633d6fbe7af3209a7ba67fce Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 13:28:46 -0500 Subject: [PATCH 038/109] dead code cleanup --- tamp/_c_src/tamp/compressor.c | 47 +++++++++-------------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index ed6791d8..5bb2ef9a 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -65,28 +65,15 @@ static inline void write_extended_huffman(TampCompressor *compressor, uint8_t va /** * @brief Partially flush the internal bit buffer. * - * @param[in] min_threshold Minimum bit_buffer_pos to trigger flush. Default 8 flushes whole bytes. - * Use 7 for extended match tokens which need up to 26 bits (32 - 26 = 6 max remaining). - * - * Up to (min_threshold - 1) bits may remain in the internal bit buffer. + * Flushes complete bytes from the bit buffer. Up to 7 bits may remain. */ -static inline tamp_res partial_flush_threshold(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size, uint8_t min_threshold) { - for (*output_written_size = output_size; compressor->bit_buffer_pos >= min_threshold && output_size; +static inline tamp_res partial_flush(TampCompressor *compressor, unsigned char *output, size_t output_size, + size_t *output_written_size) { + for (*output_written_size = output_size; compressor->bit_buffer_pos >= 8 && output_size; output_size--, compressor->bit_buffer_pos -= 8, compressor->bit_buffer <<= 8) *output++ = compressor->bit_buffer >> 24; *output_written_size -= output_size; - return (compressor->bit_buffer_pos >= min_threshold) ? TAMP_OUTPUT_FULL : TAMP_OK; -} - -/** - * @brief Partially flush the internal bit buffer. - * - * Up to 7 bits may remain in the internal bit buffer. - */ -static inline tamp_res partial_flush(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size) { - return partial_flush_threshold(compressor, output, output_size, output_written_size, 8); + return (compressor->bit_buffer_pos >= 8) ? TAMP_OUTPUT_FULL : TAMP_OK; } inline bool tamp_compressor_full(const TampCompressor *compressor) { @@ -271,9 +258,8 @@ static void write_rle_token(TampCompressor *compressor, uint8_t count) { /** * @brief Write extended match token to bit buffer and update window. * - * Extended match tokens can be up to 26 bits (7 symbol + 9 extended_huffman + 10 window). - * To avoid overflowing the 32-bit buffer when starting with up to 7 bits remaining, - * we do intermediate flushes between token parts. + * Token format: symbol (7 bits) + extended_huffman (up to 11 bits) + window_pos (up to 15 bits) + * Total: up to 33 bits. We flush after symbol+huffman (18 bits max) to ensure window_pos fits. * * @param[in,out] compressor Compressor state. * @param[out] output Output buffer for flushed bytes. @@ -291,32 +277,23 @@ static tamp_res write_extended_match_token(TampCompressor *compressor, unsigned *output_written_size = 0; - // Write extended match symbol (13) with literal flag (7 bits) - // Note: symbols 12 and 13 are at indices 12 and 13 in huffman table (not offset by min_pattern_size) + // Write symbol (7 bits) + extended huffman (up to 11 bits) = 18 bits max + // With ≤7 bits already in buffer, total ≤25 bits - fits in 32-bit buffer write_to_bit_buffer(compressor, huffman_codes[TAMP_EXTENDED_MATCH_SYMBOL], huffman_bits[TAMP_EXTENDED_MATCH_SYMBOL]); - - // Flush to make room for extended huffman (up to 9 bits) - res = partial_flush(compressor, output, output_size, &flush_bytes); - *output_written_size += flush_bytes; - output += flush_bytes; - output_size -= flush_bytes; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - - // Write extended huffman for (count - min_pattern_size - 11 - 1) write_extended_huffman(compressor, count - compressor->min_pattern_size - 11 - 1, TAMP_LEADING_EXTENDED_MATCH_BITS); - // Flush to make room for window position (10 bits) + // Flush to make room for window position (up to 15 bits) res = partial_flush(compressor, output, output_size, &flush_bytes); *output_written_size += flush_bytes; output += flush_bytes; output_size -= flush_bytes; if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - // Write window position + // Write window position - with ≤7 bits remaining, up to 22 bits total - fits write_to_bit_buffer(compressor, position, compressor->conf_window); - // Flush any remaining complete bytes + // Final flush res = partial_flush(compressor, output, output_size, &flush_bytes); *output_written_size += flush_bytes; if (TAMP_UNLIKELY(res != TAMP_OK)) return res; From 73660854123d73a25624193d0acd95429f2ea4ce Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 13:45:46 -0500 Subject: [PATCH 039/109] more simplification --- tamp/_c_src/tamp/compressor.c | 88 +++++++++++------------------------ 1 file changed, 27 insertions(+), 61 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 5bb2ef9a..3ecbb033 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -465,78 +465,44 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned // literal and cache the next match if (next_match_size > match_size && validate_no_match_overlap(compressor->window_pos, next_match_index, next_match_size)) { - // Write LITERAL at current position - match_size = 1; - unsigned char c = read_input(0); - if (TAMP_UNLIKELY(c >> compressor->conf_literal)) { - return TAMP_EXCESS_BITS; - } - write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf_literal + 1); + // Force literal at current position, cache next match + match_size = 0; // Will trigger literal write below } else { - // Use current match, clear cache compressor->cached_match_index = -1; // Note: No V2 extended match check here - we're in the match_size <= 8 branch, // so extended matches (which require match_size > min_pattern_size + 11) are impossible. - uint8_t huffman_index = match_size - compressor->min_pattern_size; - write_to_bit_buffer(compressor, huffman_codes[huffman_index], huffman_bits[huffman_index]); - write_to_bit_buffer(compressor, match_index, compressor->conf_window); - } - } else if (TAMP_UNLIKELY(match_size < compressor->min_pattern_size)) { - // Write LITERAL - compressor->cached_match_index = -1; // Clear cache - match_size = 1; - unsigned char c = read_input(0); - if (TAMP_UNLIKELY(c >> compressor->conf_literal)) { - return TAMP_EXCESS_BITS; } - write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf_literal + 1); } else { - // Write TOKEN compressor->cached_match_index = -1; // Clear cache -#if TAMP_V2_COMPRESS - // V2: Check for extended match - if (compressor->conf_v2 && match_size > compressor->min_pattern_size + 11) { - compressor->extended_match_count = match_size; - compressor->extended_match_position = match_index; - // Consume matched bytes from input - compressor->input_pos = input_add(match_size); - compressor->input_size -= match_size; - return TAMP_OK; - } -#endif - uint8_t huffman_index = match_size - compressor->min_pattern_size; - write_to_bit_buffer(compressor, huffman_codes[huffman_index], huffman_bits[huffman_index]); - write_to_bit_buffer(compressor, match_index, compressor->conf_window); } - } else + } #endif - { - // Non-lazy matching path - if (TAMP_UNLIKELY(match_size < compressor->min_pattern_size)) { - // Write LITERAL - match_size = 1; - unsigned char c = read_input(0); - if (TAMP_UNLIKELY(c >> compressor->conf_literal)) { - return TAMP_EXCESS_BITS; - } - write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf_literal + 1); - } else { + + // Shared token/literal writing logic + if (TAMP_UNLIKELY(match_size < compressor->min_pattern_size)) { + // Write LITERAL + match_size = 1; + unsigned char c = read_input(0); + if (TAMP_UNLIKELY(c >> compressor->conf_literal)) { + return TAMP_EXCESS_BITS; + } + write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf_literal + 1); + } else { #if TAMP_V2_COMPRESS - // V2: Check for extended match - if (compressor->conf_v2 && match_size > compressor->min_pattern_size + 11) { - compressor->extended_match_count = match_size; - compressor->extended_match_position = match_index; - // Consume matched bytes from input - compressor->input_pos = input_add(match_size); - compressor->input_size -= match_size; - return TAMP_OK; - } -#endif - // Write TOKEN - uint8_t huffman_index = match_size - compressor->min_pattern_size; - write_to_bit_buffer(compressor, huffman_codes[huffman_index], huffman_bits[huffman_index]); - write_to_bit_buffer(compressor, match_index, compressor->conf_window); + // V2: Check for extended match + if (compressor->conf_v2 && match_size > compressor->min_pattern_size + 11) { + compressor->extended_match_count = match_size; + compressor->extended_match_position = match_index; + // Consume matched bytes from input + compressor->input_pos = input_add(match_size); + compressor->input_size -= match_size; + return TAMP_OK; } +#endif + // Write TOKEN + uint8_t huffman_index = match_size - compressor->min_pattern_size; + write_to_bit_buffer(compressor, huffman_codes[huffman_index], huffman_bits[huffman_index]); + write_to_bit_buffer(compressor, match_index, compressor->conf_window); } // Populate Window for (uint8_t i = 0; i < match_size; i++) { From 8c222e39f1609b541c098370086571b2faf724b1 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 14:00:32 -0500 Subject: [PATCH 040/109] make window_pos a uint16 instead of a bitfield. --- tamp/_c_src/tamp/compressor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.h b/tamp/_c_src/tamp/compressor.h index 91de900f..4b3ccf54 100644 --- a/tamp/_c_src/tamp/compressor.h +++ b/tamp/_c_src/tamp/compressor.h @@ -55,13 +55,13 @@ typedef struct TampCompressor { #endif /* Other small attributes */ - uint32_t window_pos : 15; uint32_t bit_buffer_pos : 6; uint32_t min_pattern_size : 2; - uint32_t input_size : 5; uint32_t input_pos : 4; + uint16_t window_pos; // Window position (0 to window_size-1) + #if TAMP_LAZY_MATCHING /* Lazy matching cache */ int16_t cached_match_index; From c8c39c2e90dd6f92e9dad01816431e819e55300d Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 14:13:34 -0500 Subject: [PATCH 041/109] decompress on-device-compression-benchmark results to check validity. --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0949b15b..ad7a44be 100644 --- a/Makefile +++ b/Makefile @@ -227,7 +227,7 @@ define mpremote-sync fi endef -on-device-compression-benchmark: mpy build/enwik8-100kb build/enwik8-100kb.tamp +on-device-compression-benchmark: mpy build/enwik8-100kb $(MPREMOTE) rm :enwik8-100kb.tamp || true @# Remove any viper implementation that may exist from previous belay syncs $(MPREMOTE) rm :tamp/__init__.py :tamp/compressor_viper.py :tamp/decompressor_viper.py :tamp/compressor.py :tamp/decompressor.py :tamp/__main__.py :tamp/py.typed 2>/dev/null || true @@ -238,7 +238,8 @@ on-device-compression-benchmark: mpy build/enwik8-100kb build/enwik8-100kb.tamp $(MPREMOTE) soft-reset $(MPREMOTE) run tools/on-device-compression-benchmark.py $(MPREMOTE) cp :enwik8-100kb.tamp build/on-device-enwik8-100kb.tamp - cmp build/enwik8-100kb.tamp build/on-device-enwik8-100kb.tamp + poetry run tamp decompress build/on-device-enwik8-100kb.tamp -o build/on-device-enwik8-100kb-decompressed + cmp build/enwik8-100kb build/on-device-enwik8-100kb-decompressed @echo "Success!" on-device-decompression-benchmark: mpy build/enwik8-100kb.tamp From 0cba4eac0eb9ff9cbdb87f2ccfb4d8dd0f8cf8fa Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 14:20:05 -0500 Subject: [PATCH 042/109] more firmware optimizations --- tamp/_c_src/tamp/compressor.c | 4 ++-- tamp/_c_src/tamp/compressor.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 3ecbb033..fa853df1 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -67,8 +67,8 @@ static inline void write_extended_huffman(TampCompressor *compressor, uint8_t va * * Flushes complete bytes from the bit buffer. Up to 7 bits may remain. */ -static inline tamp_res partial_flush(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size) { +static TAMP_NOINLINE tamp_res partial_flush(TampCompressor *compressor, unsigned char *output, size_t output_size, + size_t *output_written_size) { for (*output_written_size = output_size; compressor->bit_buffer_pos >= 8 && output_size; output_size--, compressor->bit_buffer_pos -= 8, compressor->bit_buffer <<= 8) *output++ = compressor->bit_buffer >> 24; diff --git a/tamp/_c_src/tamp/compressor.h b/tamp/_c_src/tamp/compressor.h index 4b3ccf54..aea698cc 100644 --- a/tamp/_c_src/tamp/compressor.h +++ b/tamp/_c_src/tamp/compressor.h @@ -55,12 +55,12 @@ typedef struct TampCompressor { #endif /* Other small attributes */ - uint32_t bit_buffer_pos : 6; uint32_t min_pattern_size : 2; uint32_t input_size : 5; uint32_t input_pos : 4; - uint16_t window_pos; // Window position (0 to window_size-1) + uint8_t bit_buffer_pos; // 6 bits used; bits pending in bit_buffer (0-32) + uint16_t window_pos; // 15 bits used; position in window (0 to window_size-1) #if TAMP_LAZY_MATCHING /* Lazy matching cache */ From 1d0649598d7be133acaa171316fd0c5373f56f3a Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 14:54:27 -0500 Subject: [PATCH 043/109] rework compressor struct. --- tamp/_c_src/tamp/compressor.c | 19 ++++---- tamp/_c_src/tamp/compressor.h | 88 ++++++++++++++++------------------- 2 files changed, 49 insertions(+), 58 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index fa853df1..a92c1704 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -184,6 +184,9 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, if (!conf) conf = &conf_default; if (conf->window < 8 || conf->window > 15) return TAMP_INVALID_CONF; if (conf->literal < 5 || conf->literal > 8) return TAMP_INVALID_CONF; +#if !TAMP_V2_COMPRESS + if (conf->v2) return TAMP_INVALID_CONF; // V2 requested but not compiled in +#endif for (uint8_t i = 0; i < sizeof(TampCompressor); i++) // Zero-out the struct ((unsigned char *)compressor)[i] = 0; @@ -191,12 +194,10 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, compressor->conf_literal = conf->literal; compressor->conf_window = conf->window; compressor->conf_use_custom_dictionary = conf->use_custom_dictionary; + compressor->conf_v2 = conf->v2; #if TAMP_LAZY_MATCHING compressor->conf_lazy_matching = conf->lazy_matching; #endif -#if TAMP_V2_COMPRESS - compressor->conf_v2 = conf->v2; -#endif compressor->window = window; compressor->min_pattern_size = tamp_compute_min_pattern_size(conf->window, conf->literal); @@ -208,14 +209,10 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, if (!compressor->conf_use_custom_dictionary) tamp_initialize_dictionary(window, (1 << conf->window)); // Write header to bit buffer - write_to_bit_buffer(compressor, conf->window - 8, 3); - write_to_bit_buffer(compressor, conf->literal - 5, 2); - write_to_bit_buffer(compressor, conf->use_custom_dictionary, 1); -#if TAMP_V2_COMPRESS - write_to_bit_buffer(compressor, conf->v2, 1); // v2 format flag -#else - write_to_bit_buffer(compressor, 0, 1); // Reserved (v1 only) -#endif + write_to_bit_buffer(compressor, compressor->conf_window - 8, 3); + write_to_bit_buffer(compressor, compressor->conf_literal - 5, 2); + write_to_bit_buffer(compressor, compressor->conf_use_custom_dictionary, 1); + write_to_bit_buffer(compressor, compressor->conf_v2, 1); write_to_bit_buffer(compressor, 0, 1); // No more header bytes return TAMP_OK; diff --git a/tamp/_c_src/tamp/compressor.h b/tamp/_c_src/tamp/compressor.h index aea698cc..361bf112 100644 --- a/tamp/_c_src/tamp/compressor.h +++ b/tamp/_c_src/tamp/compressor.h @@ -7,78 +7,72 @@ extern "C" { #include "common.h" -/* Externally, do not directly edit ANY of these attributes */ +/* Externally, do not directly edit ANY of these attributes. + * Fields are ordered by access frequency for cache efficiency. + */ typedef struct TampCompressor { - /* nicely aligned attributes */ - #if TAMP_ESP32 // Avoid bitfields for speed. - uint32_t window_pos; - uint32_t bit_buffer_pos; - - uint32_t input_size; - uint32_t input_pos; - - /* Conf attributes */ - uint8_t conf_window; // number of window bits - uint8_t conf_literal; // number of literal bits - uint8_t conf_use_custom_dictionary; // Use a custom initialized dictionary. -#if TAMP_LAZY_MATCHING - uint8_t conf_lazy_matching; // Use lazy matching for better compression -#endif -#if TAMP_V2_COMPRESS - uint8_t conf_v2; // Use v2 format (RLE, extended match) -#endif - uint8_t min_pattern_size; + /* HOT: accessed every iteration of the compression loop */ + unsigned char *window; // Pointer to window buffer + uint32_t bit_buffer; // Bit buffer for output (32 bits) + uint32_t window_pos; // Current position in window (15 bits used) + uint32_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits used) + uint32_t input_size; // Bytes in input buffer (5 bits used; 0-16) + uint32_t input_pos; // Current position in input buffer (4 bits used; 0-15) + unsigned char input[16]; // Input ring buffer + /* WARM: read frequently, often cached in locals */ + uint8_t min_pattern_size; // Minimum pattern size (2 bits used; 2 or 3) + uint8_t conf_window; // Window bits (4 bits used; 8-15) + uint8_t conf_literal; // Literal bits (4 bits used; 5-8) + uint8_t conf_use_custom_dictionary; // Custom dictionary (1 bit used; init only) + uint8_t conf_v2; // V2 format enabled (1 bit used) #if TAMP_LAZY_MATCHING - /* Lazy matching cache */ - int16_t cached_match_index; + uint8_t conf_lazy_matching; // Lazy matching enabled (1 bit used) + int16_t cached_match_index; // Lazy matching cache uint8_t cached_match_size; #endif #if TAMP_V2_COMPRESS - /* V2 state */ + /* V2 state (only needed when V2 compression is enabled) */ uint8_t rle_count; // Current RLE run length (max 225) uint8_t extended_match_count; // Current extended match size (max ~126) uint16_t extended_match_position; // Window position for extended match #endif -#else // Use bitfields for reduced memory-usage - /* Conf attributes */ - uint32_t conf_window : 4; // number of window bits - uint32_t conf_literal : 4; // number of literal bits - uint32_t conf_use_custom_dictionary : 1; // Use a custom initialized dictionary. -#if TAMP_LAZY_MATCHING - uint32_t conf_lazy_matching : 1; // Use lazy matching for better compression -#endif -#if TAMP_V2_COMPRESS - uint32_t conf_v2 : 1; // Use v2 format (RLE, extended match) -#endif - /* Other small attributes */ - uint32_t min_pattern_size : 2; - uint32_t input_size : 5; - uint32_t input_pos : 4; +#else // Use bitfields for reduced memory-usage - uint8_t bit_buffer_pos; // 6 bits used; bits pending in bit_buffer (0-32) - uint16_t window_pos; // 15 bits used; position in window (0 to window_size-1) + /* HOT: accessed every iteration of the compression loop */ + unsigned char *window; // Pointer to window buffer + uint32_t bit_buffer; // Bit buffer for output (32 bits) + uint16_t window_pos; // Current position in window (15 bits used) + uint8_t bit_buffer_pos; // Bits currently in bit_buffer (6 bits used) + uint8_t input_size; // Bytes in input buffer (5 bits used; 0-16) + uint8_t input_pos; // Current position in input buffer (4 bits used; 0-15) + unsigned char input[16]; // Input ring buffer + /* WARM: read frequently, often cached in locals. + * Bitfields: min_pattern_size(2) + conf_window(4) + conf_literal(4) + + * conf_use_custom_dictionary(1) + conf_v2(1) + conf_lazy_matching(1) = 13 bits + */ + uint8_t min_pattern_size : 2; // Minimum pattern size (2 or 3) + uint8_t conf_window : 4; // Window bits (8-15) + uint8_t conf_literal : 4; // Literal bits (5-8) + uint8_t conf_use_custom_dictionary : 1; // Custom dictionary (init only) + uint8_t conf_v2 : 1; // V2 format enabled #if TAMP_LAZY_MATCHING - /* Lazy matching cache */ - int16_t cached_match_index; + uint8_t conf_lazy_matching : 1; // Lazy matching enabled + int16_t cached_match_index; // Lazy matching cache uint8_t cached_match_size; #endif #if TAMP_V2_COMPRESS - /* V2 state */ uint8_t rle_count; // Current RLE run length (max 225) uint8_t extended_match_count; // Current extended match size (max ~126) uint16_t extended_match_position; // Window position for extended match #endif -#endif // TAMP_ESP32 - unsigned char input[16] /* __attribute__ ((aligned (16)))*/; - uint32_t bit_buffer; - unsigned char *window; +#endif // TAMP_ESP32 } TampCompressor; /** From e51f184770e339519f2b1a4c9a6d521057645044 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 14:58:37 -0500 Subject: [PATCH 044/109] update mpy_bindings with v2 flag. --- mpy_bindings/bindings.c | 1 + mpy_bindings/bindings_compressor.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mpy_bindings/bindings.c b/mpy_bindings/bindings.c index d75d559e..2841e5cc 100644 --- a/mpy_bindings/bindings.c +++ b/mpy_bindings/bindings.c @@ -67,6 +67,7 @@ static mp_obj_t compressor_make_new(const mp_obj_type_t *type, size_t n_args, si .window = mp_obj_get_int(args_in[1]), .literal = mp_obj_get_int(args_in[2]), .use_custom_dictionary = mp_obj_get_int(args_in[4]), + .v2 = mp_obj_get_int(args_in[5]), }; mp_obj_compressor_t *o = mp_obj_malloc(mp_obj_compressor_t, type); diff --git a/mpy_bindings/bindings_compressor.py b/mpy_bindings/bindings_compressor.py index 414ae6a4..c40b2351 100644 --- a/mpy_bindings/bindings_compressor.py +++ b/mpy_bindings/bindings_compressor.py @@ -9,6 +9,7 @@ def __init__( window=10, literal=8, dictionary=None, + v2=True, ): self._cf = False # shorter name to save binary space if not hasattr(f, "write"): # It's probably a path-like object. @@ -18,7 +19,7 @@ def __init__( custom = dictionary is not None if not dictionary: dictionary = bytearray(1 << window) - self._c = _C(f, window, literal, dictionary, custom) + self._c = _C(f, window, literal, dictionary, custom, v2) self.write = self._c.write From 961803a55da94c621b5e1f0c63f9d78ba90a4fb4 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 18:38:36 -0500 Subject: [PATCH 045/109] prevent TAMP_COMPRESS_FULL for extended-match --- mpy_bindings/bindings.c | 3 ++- tamp/_c_compressor.pyx | 2 +- tamp/_c_src/tamp/compressor.c | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/mpy_bindings/bindings.c b/mpy_bindings/bindings.c index 2841e5cc..9f07ae39 100644 --- a/mpy_bindings/bindings.c +++ b/mpy_bindings/bindings.c @@ -9,7 +9,8 @@ **********/ #include "tamp/common.h" -#define CHUNK_SIZE 32 // Must be <= 65535 +#define CHUNK_SIZE 32 // Must be >= 32 and <= 65535 +_Static_assert(CHUNK_SIZE >= 32, "CHUNK_SIZE must be >= 32 to hold flush output"); #define mp_type_bytearray (*(mp_obj_type_t *)(mp_load_global(MP_QSTR_bytearray))) static void TAMP_CHECK(tamp_res res) { diff --git a/tamp/_c_compressor.pyx b/tamp/_c_compressor.pyx index 8655d085..eff25276 100644 --- a/tamp/_c_compressor.pyx +++ b/tamp/_c_compressor.pyx @@ -104,7 +104,7 @@ cdef class Compressor: cpdef int flush(self, bool write_token = True) except -1: cdef ctamp.tamp_res res - cdef bytearray buffer = bytearray(24) + cdef bytearray buffer = bytearray(32) cdef size_t output_written_size = 0 res = ctamp.tamp_compressor_flush( diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index a92c1704..9b282a15 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -35,6 +35,12 @@ static const uint8_t huffman_bits[] = {0x2, 0x3, 0x5, 0x5, 0x6, 0x7, 0x7, 0x7, 0 // V2: Maximum extended match extra = (13 << 3) + 7 + 1 = 112 // Total max match = min_pattern_size + 11 + 112 = min_pattern_size + 123 #define EXTENDED_MATCH_MAX_EXTRA 112 + +// Minimum output buffer space required for extended match token. +// Extended match: symbol (7 bits) + extended huffman (11 bits) + window pos (15 bits) = 33 bits. +// With 7 bits in bit buffer, need up to 40 bits = 5 bytes. Add 1 byte margin. +// Pre-checking prevents OUTPUT_FULL mid-token, which would corrupt bit_buffer on retry. +#define EXTENDED_MATCH_MIN_OUTPUT_BYTES 6 #endif static inline void write_to_bit_buffer(TampCompressor *compressor, uint32_t bits, uint8_t n_bits) { @@ -348,6 +354,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned // Check if extending would go beyond window buffer boundary (no wrap-around) if (current_pos + current_count >= WINDOW_SIZE) { + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; size_t token_bytes; res = write_extended_match_token(compressor, output, output_size, &token_bytes); (*output_written_size) += token_bytes; @@ -357,6 +365,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned // Check if we've reached max extended match size if (current_count >= max_ext_match) { + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; size_t token_bytes; res = write_extended_match_token(compressor, output, output_size, &token_bytes); (*output_written_size) += token_bytes; @@ -373,6 +383,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned // Continue to next iteration to try extending further } else { // Match ended - emit current match + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; size_t token_bytes; res = write_extended_match_token(compressor, output, output_size, &token_bytes); (*output_written_size) += token_bytes; @@ -622,6 +634,8 @@ tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output // V2: Flush any pending extended match if (compressor->conf_v2 && compressor->extended_match_count) { + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); (*output_written_size) += chunk_output_written_size; if (TAMP_UNLIKELY(res != TAMP_OK)) return res; From 3f4666322ceaf3892071798dfd6b874e6d3abad4 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 18:43:59 -0500 Subject: [PATCH 046/109] don't inline rle/extended-match --- tamp/_c_src/tamp/compressor.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 9b282a15..ad9768aa 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -58,7 +58,7 @@ static inline void write_to_bit_buffer(TampCompressor *compressor, uint32_t bits * @param[in] value The value to encode. * @param[in] trailing_bits Number of trailing bits (3 for extended match, 4 for RLE). */ -static inline void write_extended_huffman(TampCompressor *compressor, uint8_t value, uint8_t trailing_bits) { +static TAMP_NOINLINE void write_extended_huffman(TampCompressor *compressor, uint8_t value, uint8_t trailing_bits) { uint8_t mask = (1 << trailing_bits) - 1; uint8_t code_index = value >> trailing_bits; // Write huffman code without literal flag (subtract 1 from bit length) @@ -239,7 +239,7 @@ static inline uint8_t get_last_window_byte(TampCompressor *compressor) { * @param[in,out] compressor Compressor state. * @param[in] count Number of repeated bytes (must be >= 2). */ -static void write_rle_token(TampCompressor *compressor, uint8_t count) { +static TAMP_NOINLINE void write_rle_token(TampCompressor *compressor, uint8_t count) { const uint16_t window_mask = (1 << compressor->conf_window) - 1; uint8_t symbol = get_last_window_byte(compressor); @@ -270,8 +270,8 @@ static void write_rle_token(TampCompressor *compressor, uint8_t count) { * @param[out] output_written_size Bytes written to output. * @return TAMP_OK on success, TAMP_OUTPUT_FULL if output buffer is too small. */ -static tamp_res write_extended_match_token(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size) { +static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compressor, unsigned char *output, + size_t output_size, size_t *output_written_size) { const uint16_t window_mask = (1 << compressor->conf_window) - 1; const uint8_t count = compressor->extended_match_count; const uint16_t position = compressor->extended_match_position; From 02c596e4d8cf9d6681d3159afd287e106c2c639d Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 19:34:51 -0500 Subject: [PATCH 047/109] Add .clangd to the gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 34568b58..2bac59d1 100644 --- a/.gitignore +++ b/.gitignore @@ -437,6 +437,9 @@ wasm/build/ *.swo *~ +# clangd (C/C++ language server) +.clangd + # Emacs *~ \#*\# From fe16ce9ccb55174794ff3b16a3790b6c2f3ce0eb Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 19:35:29 -0500 Subject: [PATCH 048/109] Add v2 option to wasm --- wasm/src/tamp.d.ts | 3 +++ wasm/src/tamp.js | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/wasm/src/tamp.d.ts b/wasm/src/tamp.d.ts index 584e2c43..b2c11fac 100644 --- a/wasm/src/tamp.d.ts +++ b/wasm/src/tamp.d.ts @@ -43,6 +43,8 @@ export interface TampOptions { literal?: number; /** Custom dictionary data. If null, no custom dictionary is used. If Uint8Array, uses the provided dictionary. Default: null */ dictionary?: Uint8Array | null; + /** Enable v2 format (RLE, extended match) for better compression ratios. Default: true */ + v2?: boolean; /** Enable lazy matching for better compression ratios. Default: false */ lazy_matching?: boolean; } @@ -68,6 +70,7 @@ export interface TampDefaults { readonly window: 10; readonly literal: 8; readonly dictionary: null; + readonly v2: true; readonly lazy_matching: false; } diff --git a/wasm/src/tamp.js b/wasm/src/tamp.js index f52788db..3de7c235 100644 --- a/wasm/src/tamp.js +++ b/wasm/src/tamp.js @@ -122,6 +122,7 @@ export class TampCompressor { window: 10, literal: 8, dictionary: null, + v2: true, lazy_matching: false, ...options, }; @@ -183,7 +184,8 @@ export class TampCompressor { (this.options.window & 0xf) | ((this.options.literal & 0xf) << 4) | ((this.options.dictionary ? 1 : 0) << 8) | - ((this.options.lazy_matching ? 1 : 0) << 9); + ((this.options.v2 ? 1 : 0) << 9) | + ((this.options.lazy_matching ? 1 : 0) << 10); this.module.setValue(confPtr, confValue, 'i32'); // Initialize compressor @@ -790,10 +792,11 @@ export async function compress(data, options = {}) { const callbackOptions = {}; // Extract compression-specific options - const { window, literal, dictionary, lazy_matching, onPoll, signal, pollIntervalMs, pollIntervalBytes } = options; + const { window, literal, dictionary, v2, lazy_matching, onPoll, signal, pollIntervalMs, pollIntervalBytes } = options; if (window !== undefined) compressionOptions.window = window; if (literal !== undefined) compressionOptions.literal = literal; if (dictionary !== undefined) compressionOptions.dictionary = dictionary; + if (v2 !== undefined) compressionOptions.v2 = v2; if (lazy_matching !== undefined) compressionOptions.lazy_matching = lazy_matching; // Extract callback options From 41b4b13419bd89a017f7e31b3f1b972b9f1e5ea8 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Mon, 2 Feb 2026 19:42:55 -0500 Subject: [PATCH 049/109] Add v2 to the website. --- website/index.html | 22 +++++++++++++++++++++- website/js/main.js | 14 +++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/website/index.html b/website/index.html index 88474519..9fb60465 100644 --- a/website/index.html +++ b/website/index.html @@ -144,6 +144,16 @@

Configuration +
+ + +
+ ? +
+ Enables v2 compression format with RLE and extended match encoding for better compression ratios. Recommended for most use cases. +
+
+
@@ -182,6 +192,16 @@

Configuration

+
+ + +
+ ? +
+ Enables v2 compression format with RLE and extended match encoding for better compression ratios. Recommended for most use cases. +
+
+
@@ -194,7 +214,7 @@

Configuration

- +
- - + +
?
- Enables v2 compression format with RLE and extended match encoding for better compression ratios. Recommended for most use cases. + Enables extended compression format with RLE and extended match encoding for better compression ratios. Recommended for most use cases.
@@ -193,12 +193,12 @@

Configuration

- - + +
?
- Enables v2 compression format with RLE and extended match encoding for better compression ratios. Recommended for most use cases. + Enables extended compression format with RLE and extended match encoding for better compression ratios. Recommended for most use cases.
diff --git a/website/js/main.js b/website/js/main.js index 15458ab2..58ea6e03 100644 --- a/website/js/main.js +++ b/website/js/main.js @@ -23,10 +23,10 @@ let dropZone, compressionOptions, textMode, windowBitsSelect, - v2Checkbox, + extendedCheckbox, lazyMatchingCheckbox, textWindowBitsSelect, - textV2Checkbox, + textExtendedCheckbox, textLazyMatchingCheckbox, plainTextArea, compressedTextArea, @@ -89,10 +89,10 @@ document.addEventListener('DOMContentLoaded', () => { compressionOptions = document.getElementById('compressionOptions'); textMode = document.getElementById('textMode'); windowBitsSelect = document.getElementById('windowBits'); - v2Checkbox = document.getElementById('v2'); + extendedCheckbox = document.getElementById('extended'); lazyMatchingCheckbox = document.getElementById('lazyMatching'); textWindowBitsSelect = document.getElementById('textWindowBits'); - textV2Checkbox = document.getElementById('textV2'); + textExtendedCheckbox = document.getElementById('textExtended'); textLazyMatchingCheckbox = document.getElementById('textLazyMatching'); plainTextArea = document.getElementById('plainText'); compressedTextArea = document.getElementById('compressedText'); @@ -352,7 +352,7 @@ async function processFiles() { const windowBits = parseInt(windowBitsSelect.value); const options = { window: windowBits, - v2: v2Checkbox.checked, + extended: extendedCheckbox.checked, // Add progress callback for compression with overall progress calculation onPoll: async progressInfo => { const bytesProcessed = progressInfo.bytesProcessed || 0; @@ -588,7 +588,7 @@ async function compressTextContent() { try { const options = { window: windowBits, - v2: textV2Checkbox.checked, + extended: textExtendedCheckbox.checked, // Add progress callback for text compression onPoll: async progressInfo => { const bytesProcessed = progressInfo.bytesProcessed || 0; @@ -630,7 +630,7 @@ async function compressTextContent() { const ratio = data.length > 0 ? (data.length / compressed.length).toFixed(2) : '0'; const savings = data.length > 0 ? ((1 - compressed.length / data.length) * 100).toFixed(1) : '0'; - const configStr = `${windowBits}-bit window${textV2Checkbox.checked ? ', v2' : ', v1'}${ + const configStr = `${windowBits}-bit window${textExtendedCheckbox.checked ? ', extended' : ', basic'}${ isPureAscii ? ', 7-bit literals' : '' }${textLazyMatchingCheckbox.checked ? ', lazy matching' : ''}${ dictionaryValidation.dictionaryBytes ? ', custom dictionary' : '' From a0a75434a20fc1c1ccb2983f7fb8a3ea06282ddf Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 09:32:06 -0500 Subject: [PATCH 051/109] Change cppcheck to use local --- .pre-commit-config.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1c1cbfcb..686769a3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -57,10 +57,13 @@ repos: args: ['-style=file', '-i'] exclude: ^espidf/tamp/private/tamp_search\.hpp$ - - repo: https://github.com/pocc/pre-commit-hooks - rev: v1.3.5 + - repo: local hooks: - id: cppcheck + name: cppcheck + entry: cppcheck + language: system + files: \.(c|h|cpp|hpp)$ exclude: ^(espidf|mpy_bindings|ctests|tools)/ args: [ '-Itamp/_c_src', From 0652b98214965247cc0c13bccfa1e24c07e5c312 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 11:12:11 -0500 Subject: [PATCH 052/109] Add singular TAMP_EXTENDED flag; better document the compile-time flags. --- Makefile | 2 +- docs/source/c_library.rst | 55 +++++++++++++++++++++++++++++++++++++++ tamp/_c_src/tamp/common.h | 12 ++++++--- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 89a12d92..b9833532 100644 --- a/Makefile +++ b/Makefile @@ -504,7 +504,7 @@ C_SRC_COMP = tamp/_c_src/tamp/compressor.c C_SRC_DECOMP = tamp/_c_src/tamp/decompressor.c # Flags to disable extended format support -NO_EXTENDED_FLAGS = -DTAMP_EXTENDED_COMPRESS=0 -DTAMP_EXTENDED_DECOMPRESS=0 +NO_EXTENDED_FLAGS = -DTAMP_EXTENDED=0 c-size-no-extended: @rm -rf build/arm && mkdir -p build/arm diff --git a/docs/source/c_library.rst b/docs/source/c_library.rst index dadf934d..42e950a6 100644 --- a/docs/source/c_library.rst +++ b/docs/source/c_library.rst @@ -5,6 +5,61 @@ C Library Tamp provides a C library optimized for low-memory-usage, fast runtime, and small binary footprint. This page describes how to use the provided library. +Compile-Time Flags +^^^^^^^^^^^^^^^^^^ +Tamp's C library can be customized via compile-time flags to control features, code size, and performance. +Pass these flags to your compiler (e.g., ``-DTAMP_STREAM=0``). + ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| Flag | Default | Description | ++==================================+===================+==============================================================================+ +| ``TAMP_EXTENDED`` | ``1`` | Default value for extended format support (RLE, extended match encoding). | +| | | Set to ``0`` to disable extended support in both compressor and decompressor.| ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_EXTENDED_COMPRESS`` | ``TAMP_EXTENDED`` | Enable extended format compression. Defaults to ``TAMP_EXTENDED`` but can | +| | | be individually overridden for compressor-only or decompressor-only builds. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_EXTENDED_DECOMPRESS`` | ``TAMP_EXTENDED`` | Enable extended format decompression. Defaults to ``TAMP_EXTENDED`` but can | +| | | be individually overridden for compressor-only or decompressor-only builds. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_LAZY_MATCHING`` | ``0`` | Enable lazy matching support. When enabled, ``TampConf.lazy_matching`` | +| | | becomes available. Improves compression ratio by 0.5-2% at the cost of | +| | | 50-75% slower compression. Most embedded systems should leave disabled. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_STREAM`` | ``1`` | Include stream API (``tamp_compress_stream``, ``tamp_decompress_stream``). | +| | | Disable with ``-DTAMP_STREAM=0`` to save ~2.8KB if only using low-level API. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_STREAM_WORK_BUFFER_SIZE`` | ``32`` | Stack-allocated work buffer size (bytes) for stream API. Split in half | +| | | for input/output. Larger values reduce I/O callback invocations, | +| | | improving decompression speed. 256+ bytes recommended when stack permits. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_STREAM_STDIO`` | ``0`` | Enable stdio (``FILE*``) stream handlers. Works with standard C library, | +| | | ESP-IDF VFS, and POSIX-compatible systems. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_STREAM_MEMORY`` | ``0`` | Enable memory buffer stream handlers (``TampMemReader``, ``TampMemWriter``). | +| | | Useful for file-to-memory or memory-to-file operations. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_STREAM_LITTLEFS`` | ``0`` | Enable LittleFS stream handlers. Requires LittleFS headers. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_STREAM_FATFS`` | ``0`` | Enable FatFs (ChaN's FAT filesystem) stream handlers. Requires FatFs headers.| ++----------------------------------+-------------------+------------------------------------------------------------------------------+ +| ``TAMP_ESP32`` | ``0`` | Use ESP32-optimized variant. Avoids bitfields for speed at the cost of | +| | | slightly higher memory usage. Automatically enabled via Kconfig on ESP-IDF. | ++----------------------------------+-------------------+------------------------------------------------------------------------------+ + +**Example: Minimal decompressor-only build** + +.. code-block:: bash + + gcc -DTAMP_EXTENDED_COMPRESS=0 -DTAMP_STREAM=0 -c decompressor.c common.c + +**Example: Full-featured build with LittleFS support** + +.. code-block:: bash + + gcc -DTAMP_LAZY_MATCHING=1 -DTAMP_STREAM_LITTLEFS=1 -DTAMP_STREAM_WORK_BUFFER_SIZE=256 \ + -c compressor.c decompressor.c common.c + Overview ^^^^^^^^ To use Tamp in your C project, simply copy the contents of ``tamp/_c_src`` into your project. diff --git a/tamp/_c_src/tamp/common.h b/tamp/_c_src/tamp/common.h index 48acdd7f..c33a8cb4 100644 --- a/tamp/_c_src/tamp/common.h +++ b/tamp/_c_src/tamp/common.h @@ -70,13 +70,19 @@ extern "C" { /* Extended format support (RLE, extended match). * Enabled by default. Disable to save code size on minimal builds. - * Separate flags allow decompressor-only or compressor-only extended support. + * + * TAMP_EXTENDED is the master switch (default: 1). + * TAMP_EXTENDED_COMPRESS and TAMP_EXTENDED_DECOMPRESS default to TAMP_EXTENDED, + * but can be individually overridden for compressor-only or decompressor-only builds. */ +#ifndef TAMP_EXTENDED +#define TAMP_EXTENDED 1 +#endif #ifndef TAMP_EXTENDED_DECOMPRESS -#define TAMP_EXTENDED_DECOMPRESS 1 +#define TAMP_EXTENDED_DECOMPRESS TAMP_EXTENDED #endif #ifndef TAMP_EXTENDED_COMPRESS -#define TAMP_EXTENDED_COMPRESS 1 +#define TAMP_EXTENDED_COMPRESS TAMP_EXTENDED #endif /* Extended encoding constants */ From f1a06db7ae83eab2b9d891c2357c0246fd49dda1 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 11:29:36 -0500 Subject: [PATCH 053/109] Mention extended field in javascript docs. --- docs/source/javascript.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/javascript.rst b/docs/source/javascript.rst index 39bbe215..6972befe 100644 --- a/docs/source/javascript.rst +++ b/docs/source/javascript.rst @@ -51,6 +51,12 @@ Customize compression behavior with options: // For general use, 8 (the whole byte) is appropriate. literal: 7, + // Enable extended format (RLE, extended match) for better compression ratios. + // The extended format provides better compression for typical data at the + // cost of slightly more complex encoding. + // Default: true + extended: true, + // Enable lazy matching to slightly improve compression (0.5-2.0%) ratios // at the cost of 50-75% slower compression. // Most embedded systems will **not** want to use this feature and disable it. @@ -136,6 +142,7 @@ Configure compression parameters by passing in options: const options = { window: 12, // Larger window for (usually) better compression literal: 7, // ASCII text only requires 7 bits. + extended: true, // Enable extended format (RLE, extended match) lazy_matching: true // Better compression ratios; slower to compress }; From 8fb80424f305fb726c46412763ea04e384d8e4df Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 12:08:24 -0500 Subject: [PATCH 054/109] update README, add ablation study. --- README.md | 70 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 73fdf5be..811b73af 100644 --- a/README.md +++ b/README.md @@ -258,31 +258,30 @@ input data sourced from the [Enwik8](https://mattmahoney.net/dc/textdata.html). This should give a general idea of how these algorithms perform over a variety of input data types. -| dataset | raw | tamp | tamp (LazyMatching) | zlib | heatshrink | -| --------------- | ----------- | -------------- | ------------------- | -------------- | ---------- | -| enwik8 | 100,000,000 | **51,635,633** | 51,252,113 | 56,205,166 | 56,110,394 | -| silesia/dickens | 10,192,446 | **5,546,761** | 5,511,604 | 6,049,169 | 6,155,768 | -| silesia/mozilla | 51,220,480 | 25,121,385 | 24,936,067 | **25,104,966** | 25,435,908 | -| silesia/mr | 9,970,564 | 5,027,032 | 4,886,272 | **4,864,734** | 5,442,180 | -| silesia/nci | 33,553,445 | 8,643,610 | 8,645,299 | **5,765,521** | 8,247,487 | -| silesia/ooffice | 6,152,192 | **3,814,938** | 3,798,261 | 4,077,277 | 3,994,589 | -| silesia/osdb | 10,085,684 | **8,520,835** | 8,506,443 | 8,625,159 | 8,747,527 | -| silesia/reymont | 6,627,202 | **2,847,981** | 2,820,870 | 2,897,661 | 2,910,251 | -| silesia/samba | 21,606,400 | 9,102,594 | 9,060,692 | **8,862,423** | 9,223,827 | -| silesia/sao | 7,251,944 | **6,137,755** | 6,101,744 | 6,506,417 | 6,400,926 | -| silesia/webster | 41,458,703 | **18,694,172** | 18,567,288 | 20,212,235 | 19,942,817 | -| silesia/x-ray | 8,474,240 | 7,510,606 | 7,405,814 | **7,351,750** | 8,059,723 | -| silesia/xml | 5,345,280 | 1,681,687 | 1,672,660 | **1,586,985** | 1,665,179 | - -Tamp usually out-performs heatshrink, and is generally very competitive with -zlib. While trying to be an apples-to-apples comparison, zlib still uses -significantly more memory during both compression and decompression (see next -section). Tamp accomplishes competitive performance while using around 10x less -memory. +| dataset | raw | tamp | tamp (LazyMatching) | zlib | heatshrink | +| --------------- | ----------- | ----------- | ------------------- | ------------- | ---------- | +| enwik8 | 100,000,000 | 51,116,968 | **50,725,098** | 56,205,166 | 56,110,394 | +| RPI_PICO (.uf2) | 667,648 | **288,704** | 289,735 | 303,763 | - | +| silesia/dickens | 10,192,446 | 5,538,712 | **5,503,021** | 6,049,169 | 6,155,768 | +| silesia/mozilla | 51,220,480 | 24,499,954 | **24,311,290** | 25,104,966 | 25,435,908 | +| silesia/mr | 9,970,564 | 4,524,424 | **4,396,124** | 4,864,734 | 5,442,180 | +| silesia/nci | 33,553,445 | 7,093,354 | 7,003,632 | **5,765,521** | 8,247,487 | +| silesia/ooffice | 6,152,192 | 3,779,238 | **3,763,795** | 4,077,277 | 3,994,589 | +| silesia/osdb | 10,085,684 | 8,467,407 | **8,452,497** | 8,625,159 | 8,747,527 | +| silesia/reymont | 6,627,202 | 2,825,458 | **2,793,473** | 2,897,661 | 2,910,251 | +| silesia/samba | 21,606,400 | 8,443,932 | **8,395,048** | 8,862,423 | 9,223,827 | +| silesia/sao | 7,251,944 | 6,136,102 | **6,100,071** | 6,506,417 | 6,400,926 | +| silesia/webster | 41,458,703 | 18,259,149 | **18,118,788** | 20,212,235 | 19,942,817 | +| silesia/x-ray | 8,474,240 | 7,509,652 | 7,404,794 | **7,351,750** | 8,059,723 | +| silesia/xml | 5,345,280 | 1,493,131 | **1,473,832** | 1,586,985 | 1,665,179 | + +Tamp outperforms both heatshrink and zlib on most datasets, winning 12 out of 14 +benchmarks. This is while using around 10x less memory than zlib during both +compression and decompression (see next section). Lazy Matching is a simple technique to improve compression ratios at the expense of CPU while requiring very little code. One can expect **50-75%** more CPU -usage for modest compression gains (around 0.5 - 2.0%). Because of this poor +usage for modest compression gains (around 0.5 - 2.0%). Because of this trade-off, it is disabled by default; however, in applications where we want to compress once on a powerful machine (like a desktop/server) and decompress on an embedded device, it may be worth it to spend a bit more compute. Lazy matched @@ -305,6 +304,33 @@ repeating data more efficiently. Given Tamp's excellent performance in most of the other data compression benchmark files, this is a good tradeoff for most real-world scenarios. +### Ablation Study + +The following table shows the effect of the `extended` and `lazy_matching` +compression parameters across all benchmark datasets (`window=10`, `literal=8`). + +| dataset | raw | Baseline | +lazy | +extended | +lazy +extended | +| --------------- | ----------- | ---------- | ------------------ | ------------------ | ------------------ | +| enwik8 | 100,000,000 | 51,635,633 | 51,252,113 (−0.7%) | 51,116,968 (−1.0%) | 50,725,098 (−1.8%) | +| RPI_PICO (.uf2) | 667,648 | 331,310 | 329,875 (−0.4%) | 288,704 (−12.9%) | 289,735 (−12.5%) | +| silesia/dickens | 10,192,446 | 5,546,761 | 5,511,604 (−0.6%) | 5,538,712 (−0.1%) | 5,503,021 (−0.8%) | +| silesia/mozilla | 51,220,480 | 25,121,385 | 24,936,067 (−0.7%) | 24,499,954 (−2.5%) | 24,311,290 (−3.2%) | +| silesia/mr | 9,970,564 | 5,027,032 | 4,886,272 (−2.8%) | 4,524,424 (−10.0%) | 4,396,124 (−12.6%) | +| silesia/nci | 33,553,445 | 8,643,610 | 8,645,299 (+0.0%) | 7,093,354 (−17.9%) | 7,003,632 (−19.0%) | +| silesia/ooffice | 6,152,192 | 3,814,938 | 3,798,261 (−0.4%) | 3,779,238 (−0.9%) | 3,763,795 (−1.3%) | +| silesia/osdb | 10,085,684 | 8,520,835 | 8,506,443 (−0.2%) | 8,467,407 (−0.6%) | 8,452,497 (−0.8%) | +| silesia/reymont | 6,627,202 | 2,847,981 | 2,820,870 (−1.0%) | 2,825,458 (−0.8%) | 2,793,473 (−1.9%) | +| silesia/samba | 21,606,400 | 9,102,594 | 9,060,692 (−0.5%) | 8,443,932 (−7.2%) | 8,395,048 (−7.8%) | +| silesia/sao | 7,251,944 | 6,137,755 | 6,101,744 (−0.6%) | 6,136,102 (−0.0%) | 6,100,071 (−0.6%) | +| silesia/webster | 41,458,703 | 18,694,172 | 18,567,228 (−0.7%) | 18,259,149 (−2.3%) | 18,118,788 (−3.1%) | +| silesia/x-ray | 8,474,240 | 7,510,606 | 7,405,814 (−1.4%) | 7,509,652 (−0.0%) | 7,404,794 (−1.4%) | +| silesia/xml | 5,345,280 | 1,681,687 | 1,672,660 (−0.5%) | 1,493,131 (−11.2%) | 1,473,832 (−12.4%) | + +The `extended` parameter enables additional Huffman codes for longer pattern +matches, which significantly improves compression on datasets with many long +repeating patterns (e.g., nci, samba, xml). Extended support was added in +v2.0.0. + ## Memory Usage The following table shows approximately how much memory each algorithm uses From 23239dd4f9cf5e89747adce8e219c62dc619e9a1 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 15:04:26 -0500 Subject: [PATCH 055/109] Address copilot feedback --- .gitattributes | 2 +- docs/source/specification.rst | 5 +++-- tamp/_c_src/tamp/compressor.c | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitattributes b/.gitattributes index 530b7adf..a1dce8fa 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,2 @@ datasets/v1-compressed/** filter=lfs diff=lfs merge=lfs -text -datasets/v2-compressed/** filter=lfs diff=lfs merge=lfs -text +datasets/extended-compressed/** filter=lfs diff=lfs merge=lfs -text diff --git a/docs/source/specification.rst b/docs/source/specification.rst index b5fc97a7..af211760 100644 --- a/docs/source/specification.rst +++ b/docs/source/specification.rst @@ -61,8 +61,9 @@ Modifications are made to make the implementation simpler/faster. and points at the offset from the beginning of the dictionary buffer to the pattern. The shortest pattern-length is either going to be 2 or 3 bytes, depending on ``window`` and ``literal`` parameters. The shortest pattern-length encoding must be shorter than - an equivalent stream of literals. The longest pattern-length is the minimum - pattern-length plus 13. + an equivalent stream of literals. In the basic (non-extended) format, the longest + pattern-length is the minimum pattern-length plus 13. When the ``extended`` flag + is set, longer matches are possible via extended match encoding. Classically, the ``offset`` is from the current position in the buffer. Doing so results in the ``offset`` distribution slightly favoring smaller numbers. Intuitively, it makes diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index ecfbe4db..ab7ce531 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -475,6 +475,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned if (next_match_size > match_size && validate_no_match_overlap(compressor->window_pos, next_match_index, next_match_size)) { // Force literal at current position, cache next match + compressor->cached_match_index = next_match_index; + compressor->cached_match_size = next_match_size; match_size = 0; // Will trigger literal write below } else { compressor->cached_match_index = -1; From 2d1829a4a85902492eaca9e808e0e6b1c95fd429 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 19:39:07 -0500 Subject: [PATCH 056/109] update specs for v2 --- docs/source/specification.rst | 79 +++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/docs/source/specification.rst b/docs/source/specification.rst index af211760..cbbf5f58 100644 --- a/docs/source/specification.rst +++ b/docs/source/specification.rst @@ -169,6 +169,85 @@ The maximum match-size is more likely than the second-highest match-size because For any given huffman coding schema, a equivalent coding can be obtained by inverting all the bits (reflecting the huffman tree). The single-bit, most common code ``0b0`` representing a pattern-size 2 is intentionally represented as ``0b0`` instead of ``0b1``. This makes the MSb of all other codes be 1, simplifying the decoding procedure because the number of bits read doesn't strictly have to be recorded. +Extended Format (v2.0.0+) +^^^^^^^^^^^^^^^^^^^^^^^^^ +When the ``extended`` header bit is set, two additional token types are available: +RLE (Run-Length Encoding) and Extended Match. These use Huffman symbols 12 and 13 +respectively, which in the basic format would represent match sizes ``min_pattern_size + 12`` +and ``min_pattern_size + 13``. + +Extended Huffman Encoding +------------------------- +Both RLE and Extended Match use a secondary Huffman encoding to represent their payload values. +This encoding combines a Huffman code (without the literal flag) with trailing bits: + +1. Read the Huffman symbol (12 for RLE, 13 for Extended Match) with the literal flag (``0b0``). +2. Decode an additional Huffman code (reusing the same table, but without the leading literal flag bit). +3. Read trailing bits (4 bits for RLE, 3 bits for Extended Match). +4. Combine: ``value = (huffman_index << trailing_bits) + trailing_bits_value`` + +RLE Token (Symbol 12) +--------------------- +RLE encodes runs of repeated bytes efficiently. The repeated byte is implicitly +the last byte written to the window buffer. + +Format: ``0b0 | huffman_code[12] | extended_huffman(count - 2, trailing=4)`` + +Where: + +- ``huffman_code[12]`` = ``0xAA`` (9 bits including literal flag) +- ``extended_huffman`` encodes ``count - 2`` with 4 trailing bits +- ``count`` ranges from 2 to 225: ``(13 << 4) + 15 + 2 = 225`` + +Window update: Only the first 8 bytes are written to the dictionary (no wrap-around). +If fewer than 8 bytes remain before the end of the window buffer, only those bytes +are written. This bounds the window update cost while still allowing the decompressor +to find subsequent pattern matches. + +.. code-block:: text + + RLE Token Structure: + +---+------------+-------------------+----------------+ + | 0 | huffman[12]| huffman(cnt>>4) | cnt & 0xF | + +---+------------+-------------------+----------------+ + |1b | 8 bits | 1-8 bits | 4 bits | + +---+------------+-------------------+----------------+ + +Extended Match Token (Symbol 13) +-------------------------------- +Extended Match allows pattern matches longer than the basic format's maximum of +``min_pattern_size + 13``. It is used when a match exceeds ``min_pattern_size + 11``. + +Format: ``0b0 | huffman_code[13] | extended_huffman(size - min_pattern_size - 12, trailing=3) | offset`` + +Where: + +- ``huffman_code[13]`` = ``0x27`` (7 bits including literal flag) +- ``extended_huffman`` encodes ``size - min_pattern_size - 12`` with 3 trailing bits +- ``offset`` is ``window`` bits, pointing to the start of the pattern +- Maximum extra size: ``(13 << 3) + 7 + 1 = 112`` +- Maximum total match size: ``min_pattern_size + 11 + 112 = min_pattern_size + 123`` + +The ``-12`` offset ensures extended matches start at ``min_pattern_size + 12``, leaving +symbols 0-11 for basic matches (0-11 maps to ``min_pattern_size`` through ``min_pattern_size + 11``). + +Window constraints: The source pattern cannot span past the window buffer boundary; +the compressor terminates extended matches early if they would cross this boundary. +Similarly, destination writes do not wrap-around; only bytes up to the end of the +window buffer are written. This simplifies implementation while having minimal +impact on compression ratio (approximately 0.02% loss). + +.. code-block:: text + + Extended Match Token Structure: + +---+------------+-------------------+----------------+--------+ + | 0 | huffman[13]| huffman(sz>>3) | sz & 0x7 | offset | + +---+------------+-------------------+----------------+--------+ + |1b | 6 bits | 1-8 bits | 3 bits | window | + +---+------------+-------------------+----------------+--------+ + + Where sz = match_size - min_pattern_size - 12 + Flush Symbol ------------ A special FLUSH symbol is encoded as the least likely Huffman code. From eb072c9bb1dac2f825661ac05fa84e57771fd6b6 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 19:43:59 -0500 Subject: [PATCH 057/109] document RLE edgecase --- docs/source/specification.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/specification.rst b/docs/source/specification.rst index cbbf5f58..31bb1024 100644 --- a/docs/source/specification.rst +++ b/docs/source/specification.rst @@ -189,7 +189,9 @@ This encoding combines a Huffman code (without the literal flag) with trailing b RLE Token (Symbol 12) --------------------- RLE encodes runs of repeated bytes efficiently. The repeated byte is implicitly -the last byte written to the window buffer. +the last byte written to the window buffer. If no bytes have been written yet +(i.e., ``window_pos == 0``), the byte at position ``window_size - 1`` of the +initial dictionary is used. Format: ``0b0 | huffman_code[12] | extended_huffman(count - 2, trailing=4)`` From 050c2d08de110453b645cfe38c0989c75b1e0087 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 19:51:17 -0500 Subject: [PATCH 058/109] fix(wasm): add cancel handlers to streams to prevent memory leaks --- wasm/src/streams.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/wasm/src/streams.js b/wasm/src/streams.js index d3420027..2a6c3509 100644 --- a/wasm/src/streams.js +++ b/wasm/src/streams.js @@ -41,9 +41,17 @@ export class TampCompressionStream extends TransformStream { } finally { if (compressor) { compressor.destroy(); + compressor = null; } } }, + + cancel(_reason) { + if (compressor) { + compressor.destroy(); + compressor = null; + } + }, }); } } @@ -88,9 +96,17 @@ export class TampDecompressionStream extends TransformStream { } finally { if (decompressor) { decompressor.destroy(); + decompressor = null; } } }, + + cancel(_reason) { + if (decompressor) { + decompressor.destroy(); + decompressor = null; + } + }, }); } } From eebc663b2399479e9f390514d8aa9f38e8c82586 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Tue, 3 Feb 2026 19:56:37 -0500 Subject: [PATCH 059/109] update README --- README.md | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 811b73af..636e38d6 100644 --- a/README.md +++ b/README.md @@ -357,7 +357,7 @@ on an M3 Macbook Air. | | Compression (s) | Decompression (s) | | ---------------------------- | --------------- | ----------------- | | Tamp (Pure Python Reference) | 136.2 | 105.0 | -| Tamp (C bindings) | 5.56 | 0.544 | +| Tamp (C bindings) | 5.45 | 0.544 | | ZLib | 3.65 | 0.578 | | Heatshrink (with index) | 4.42 | 0.67 | | Heatshrink (without index) | 27.40 | 0.67 | @@ -377,7 +377,7 @@ speed Tamp can achieve. In all tests, a 1KB window (10 bit) was used. | | Compression (bytes/s) | Decompression (bytes/s) | | -------------------------------- | --------------------- | ----------------------- | | Tamp (MicroPython Viper) | 4,300 | 42,000 | -| Tamp (Micropython Native Module) | 31,192 | 1,086,957 | +| Tamp (Micropython Native Module) | 31,949 | 1,086,957 | | Tamp (C) | 36,127 | 1,400,600 | | Deflate (micropython builtin) | 6,885 | 294,985 | @@ -391,19 +391,21 @@ compiled for the Pi Pico (`armv6m`). All libraries were compiled with `-O3`. Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` 15.2.1 and MicroPython v1.27, and can be regenerated with `make binary-size`. -| | Compressor | Decompressor | Compressor + Decompressor | -| ------------------------- | ---------- | ------------ | ------------------------- | -| Tamp (MicroPython Viper) | 4676 | 4372 | 7917 | -| Tamp (MicroPython Native) | 3896 | 3559 | 6616 | -| Tamp (C, -DTAMP_STREAM=0) | 2028 | 1992 | 3900 | -| Tamp (C) | 2472 | 2444 | 4796 | -| Heatshrink (C) | 2956 | 3876 | 6832 | -| uzlib (C) | 2355 | 3963 | 6318 | - -Tamp C includes a high-level stream API by default. Even with `-DTAMP_STREAM=0`, -Tamp includes buffer-looping functions (like `tamp_compressor_compress`) that -Heatshrink lacks (Heatshrink only provides poll/sink primitives). In an -apples-to-apples comparison, Tamp would be even smaller. +| | Compressor | Decompressor | Compressor + Decompressor | +| -------------------------------- | ---------- | ------------ | ------------------------- | +| Tamp (MicroPython Viper) | 4676 | 4372 | 7917 | +| Tamp (MicroPython Native) | 3896 | 3559 | 6616 | +| Tamp (C, no extended, no stream) | 1800 | 1584 | 3264 | +| Tamp (C, no extended) | 2204 | 2036 | 4120 | +| Tamp (C, extended, no stream) | 2884 | 2436 | 5200 | +| Tamp (C, extended) | 3288 | 2888 | 6056 | +| Heatshrink (C) | 2956 | 3876 | 6832 | +| uzlib (C) | 2355 | 3963 | 6318 | + +Tamp C "extended" includes `tamp_compressor_compress_and_flush`. Tamp C includes +a high-level stream API by default. Even with `no stream`, Tamp includes +buffer-looping functions (like `tamp_compressor_compress`) that Heatshrink lacks +(Heatshrink only provides poll/sink primitives). ## Acknowledgement From f614a0016e0763eca430aa3ea74a0aa2a379fbbf Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 09:21:31 -0500 Subject: [PATCH 060/109] combine some TAMP_LAZY_MATCHING blocks --- tamp/_c_src/tamp/compressor.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index ab7ce531..d3ce0580 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -201,14 +201,11 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, compressor->conf_window = conf->window; compressor->conf_use_custom_dictionary = conf->use_custom_dictionary; compressor->conf_extended = conf->extended; -#if TAMP_LAZY_MATCHING - compressor->conf_lazy_matching = conf->lazy_matching; -#endif - compressor->window = window; compressor->min_pattern_size = tamp_compute_min_pattern_size(conf->window, conf->literal); #if TAMP_LAZY_MATCHING + compressor->conf_lazy_matching = conf->lazy_matching; compressor->cached_match_index = -1; // Initialize cache as invalid #endif From a8987796d928820d9a947b66e0390a3cf71eb610 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 09:25:40 -0500 Subject: [PATCH 061/109] more macro conditional blocks combined --- tamp/_c_src/tamp/compressor.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index d3ce0580..e63fd26f 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -338,6 +338,9 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned if (TAMP_UNLIKELY(output_size == 0)) return TAMP_OUTPUT_FULL; + uint8_t match_size = 0; + uint16_t match_index = 0; + #if TAMP_EXTENDED_COMPRESS // Extended: Handle extended match continuation if (TAMP_UNLIKELY(compressor->conf_extended && compressor->extended_match_count)) { @@ -392,12 +395,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned // Ran out of input while extending - return and wait for more return TAMP_OK; } -#endif // TAMP_EXTENDED_COMPRESS - - uint8_t match_size = 0; - uint16_t match_index = 0; -#if TAMP_EXTENDED_COMPRESS // Extended: Handle RLE accumulation with persistent state // For simplicity in C, we commit RLE immediately when the run ends if (TAMP_UNLIKELY(compressor->conf_extended)) { @@ -444,15 +442,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } else { find_best_match(compressor, &match_index, &match_size); } - } else { - find_best_match(compressor, &match_index, &match_size); - } -#else - find_best_match(compressor, &match_index, &match_size); -#endif -#if TAMP_LAZY_MATCHING - if (compressor->conf_lazy_matching) { // Lazy matching: if we have a good match, check if position i+1 has a better match if (match_size >= compressor->min_pattern_size && match_size <= 8 && compressor->input_size > match_size + 2) { // Temporarily advance input position to check next position @@ -483,7 +473,11 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } else { compressor->cached_match_index = -1; // Clear cache } + } else { + find_best_match(compressor, &match_index, &match_size); } +#else + find_best_match(compressor, &match_index, &match_size); #endif // Shared token/literal writing logic From 34f48be0fc82495d75e5c27670aa62241dcaddb3 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 09:38:23 -0500 Subject: [PATCH 062/109] combine some write_to_bit_buffer calls, reducing binary by 20~36 bytes --- README.md | 8 ++++---- tamp/_c_src/tamp/compressor.c | 24 +++++++++++------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 636e38d6..ac4306b5 100644 --- a/README.md +++ b/README.md @@ -395,10 +395,10 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | -------------------------------- | ---------- | ------------ | ------------------------- | | Tamp (MicroPython Viper) | 4676 | 4372 | 7917 | | Tamp (MicroPython Native) | 3896 | 3559 | 6616 | -| Tamp (C, no extended, no stream) | 1800 | 1584 | 3264 | -| Tamp (C, no extended) | 2204 | 2036 | 4120 | -| Tamp (C, extended, no stream) | 2884 | 2436 | 5200 | -| Tamp (C, extended) | 3288 | 2888 | 6056 | +| Tamp (C, no extended, no stream) | 1764 | 1584 | 3228 | +| Tamp (C, no extended) | 2168 | 2036 | 4084 | +| Tamp (C, extended, no stream) | 2864 | 2436 | 5180 | +| Tamp (C, extended) | 3268 | 2888 | 6036 | | Heatshrink (C) | 2956 | 3876 | 6832 | | uzlib (C) | 2355 | 3963 | 6318 | diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index e63fd26f..685511ad 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -59,11 +59,10 @@ static inline void write_to_bit_buffer(TampCompressor *compressor, uint32_t bits * @param[in] trailing_bits Number of trailing bits (3 for extended match, 4 for RLE). */ static TAMP_NOINLINE void write_extended_huffman(TampCompressor *compressor, uint8_t value, uint8_t trailing_bits) { - uint8_t mask = (1 << trailing_bits) - 1; uint8_t code_index = value >> trailing_bits; - // Write huffman code without literal flag (subtract 1 from bit length) - write_to_bit_buffer(compressor, huffman_codes[code_index], huffman_bits[code_index] - 1); - write_to_bit_buffer(compressor, value & mask, trailing_bits); + // Write huffman code (without literal flag) + trailing bits in one call + write_to_bit_buffer(compressor, (huffman_codes[code_index] << trailing_bits) | (value & ((1 << trailing_bits) - 1)), + (huffman_bits[code_index] - 1) + trailing_bits); } #endif // TAMP_EXTENDED_COMPRESS @@ -211,12 +210,11 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, if (!compressor->conf_use_custom_dictionary) tamp_initialize_dictionary(window, (1 << conf->window)); - // Write header to bit buffer - write_to_bit_buffer(compressor, compressor->conf_window - 8, 3); - write_to_bit_buffer(compressor, compressor->conf_literal - 5, 2); - write_to_bit_buffer(compressor, compressor->conf_use_custom_dictionary, 1); - write_to_bit_buffer(compressor, compressor->conf_extended, 1); - write_to_bit_buffer(compressor, 0, 1); // No more header bytes + // Write header to bit buffer (8 bits total) + // Layout: [window:3][literal:2][use_custom_dictionary:1][extended:1][more_headers:1] + uint8_t header = ((compressor->conf_window - 8) << 5) | ((compressor->conf_literal - 5) << 3) | + (compressor->conf_use_custom_dictionary << 2) | (compressor->conf_extended << 1); + write_to_bit_buffer(compressor, header, 8); return TAMP_OK; } @@ -501,10 +499,10 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned return TAMP_OK; } #endif // TAMP_EXTENDED_COMPRESS - // Write TOKEN + // Write TOKEN (huffman code + window position) uint8_t huffman_index = match_size - compressor->min_pattern_size; - write_to_bit_buffer(compressor, huffman_codes[huffman_index], huffman_bits[huffman_index]); - write_to_bit_buffer(compressor, match_index, compressor->conf_window); + write_to_bit_buffer(compressor, (huffman_codes[huffman_index] << compressor->conf_window) | match_index, + huffman_bits[huffman_index] + compressor->conf_window); } // Populate Window for (uint8_t i = 0; i < match_size; i++) { From d8828bc3e4b63fae6bc1c4ae790e62df43db90c6 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 09:40:39 -0500 Subject: [PATCH 063/109] replace hard-coded values with their math formulas --- tamp/_c_src/tamp/compressor.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 685511ad..d0ff261a 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -30,11 +30,8 @@ static const uint8_t huffman_codes[] = {0x0, 0x3, 0x8, 0xb, 0x14, 0x24, 0x26, 0x static const uint8_t huffman_bits[] = {0x2, 0x3, 0x5, 0x5, 0x6, 0x7, 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0x9, 0x7}; #if TAMP_EXTENDED_COMPRESS -// Extended: Maximum RLE count = (13 << 4) + 15 + 2 = 225 -#define RLE_MAX_COUNT 225 -// Extended: Maximum extended match extra = (13 << 3) + 7 + 1 = 112 -// Total max match = min_pattern_size + 11 + 112 = min_pattern_size + 123 -#define EXTENDED_MATCH_MAX_EXTRA 112 +#define RLE_MAX_COUNT ((13 << 4) + 15 + 2) // 225 +#define EXTENDED_MATCH_MAX_EXTRA ((13 << 3) + 7 + 1) // 112 // Minimum output buffer space required for extended match token. // Extended match: symbol (7 bits) + extended huffman (11 bits) + window pos (15 bits) = 33 bits. From 578fcea7e92d9939a586c9fc70cdfb7dcac24bdd Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 09:43:54 -0500 Subject: [PATCH 064/109] build header from conf rather than compressor values. Slightly more efficient --- README.md | 8 ++++---- tamp/_c_src/tamp/compressor.c | 11 ++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ac4306b5..65485a9b 100644 --- a/README.md +++ b/README.md @@ -395,10 +395,10 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | -------------------------------- | ---------- | ------------ | ------------------------- | | Tamp (MicroPython Viper) | 4676 | 4372 | 7917 | | Tamp (MicroPython Native) | 3896 | 3559 | 6616 | -| Tamp (C, no extended, no stream) | 1764 | 1584 | 3228 | -| Tamp (C, no extended) | 2168 | 2036 | 4084 | -| Tamp (C, extended, no stream) | 2864 | 2436 | 5180 | -| Tamp (C, extended) | 3268 | 2888 | 6036 | +| Tamp (C, no extended, no stream) | 1756 | 1584 | 3220 | +| Tamp (C, no extended) | 2160 | 2036 | 4076 | +| Tamp (C, extended, no stream) | 2856 | 2436 | 5172 | +| Tamp (C, extended) | 3260 | 2888 | 6028 | | Heatshrink (C) | 2956 | 3876 | 6832 | | uzlib (C) | 2355 | 3963 | 6318 | diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index d0ff261a..686bc019 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -193,6 +193,11 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, for (uint8_t i = 0; i < sizeof(TampCompressor); i++) // Zero-out the struct ((unsigned char *)compressor)[i] = 0; + // Build header directly from conf (8 bits total) + // Layout: [window:3][literal:2][use_custom_dictionary:1][extended:1][more_headers:1] + uint8_t header = ((conf->window - 8) << 5) | ((conf->literal - 5) << 3) | (conf->use_custom_dictionary << 2) | + (conf->extended << 1); + compressor->conf_literal = conf->literal; compressor->conf_window = conf->window; compressor->conf_use_custom_dictionary = conf->use_custom_dictionary; @@ -205,12 +210,8 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, compressor->cached_match_index = -1; // Initialize cache as invalid #endif - if (!compressor->conf_use_custom_dictionary) tamp_initialize_dictionary(window, (1 << conf->window)); + if (!conf->use_custom_dictionary) tamp_initialize_dictionary(window, (1 << conf->window)); - // Write header to bit buffer (8 bits total) - // Layout: [window:3][literal:2][use_custom_dictionary:1][extended:1][more_headers:1] - uint8_t header = ((compressor->conf_window - 8) << 5) | ((compressor->conf_literal - 5) << 3) | - (compressor->conf_use_custom_dictionary << 2) | (compressor->conf_extended << 1); write_to_bit_buffer(compressor, header, 8); return TAMP_OK; From b6cdf4f00b968ab6e915b2bfc8c8f922f6079d39 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 10:34:19 -0500 Subject: [PATCH 065/109] shrink binary more by embedding conf into TampCompressor instead of repacking. --- README.md | 8 ++--- espidf/tamp/compressor_esp32.cpp | 2 +- tamp/_c_src/tamp/compressor.c | 46 +++++++++++++-------------- tamp/_c_src/tamp/compressor.h | 54 ++++++++++---------------------- 4 files changed, 43 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 65485a9b..f7c21060 100644 --- a/README.md +++ b/README.md @@ -395,10 +395,10 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | -------------------------------- | ---------- | ------------ | ------------------------- | | Tamp (MicroPython Viper) | 4676 | 4372 | 7917 | | Tamp (MicroPython Native) | 3896 | 3559 | 6616 | -| Tamp (C, no extended, no stream) | 1756 | 1584 | 3220 | -| Tamp (C, no extended) | 2160 | 2036 | 4076 | -| Tamp (C, extended, no stream) | 2856 | 2436 | 5172 | -| Tamp (C, extended) | 3260 | 2888 | 6028 | +| Tamp (C, no extended, no stream) | 1648 | 1584 | 3112 | +| Tamp (C, no extended) | 2052 | 2036 | 3968 | +| Tamp (C, extended, no stream) | 2796 | 2436 | 5112 | +| Tamp (C, extended) | 3200 | 2888 | 5968 | | Heatshrink (C) | 2956 | 3876 | 6832 | | uzlib (C) | 2355 | 3963 | 6318 | diff --git a/espidf/tamp/compressor_esp32.cpp b/espidf/tamp/compressor_esp32.cpp index afa0c9be..a17570cd 100644 --- a/espidf/tamp/compressor_esp32.cpp +++ b/espidf/tamp/compressor_esp32.cpp @@ -27,7 +27,7 @@ typedef uint32_t u16; #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define MAX_PATTERN_SIZE (compressor->min_pattern_size + 13) -#define WINDOW_SIZE (1 << compressor->conf_window) +#define WINDOW_SIZE (1 << compressor->conf.window) static inline void write_to_bit_buffer(TampCompressor *compressor, uint32_t bits, u8 n_bits) { compressor->bit_buffer_pos += n_bits; diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 686bc019..991267b7 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -12,15 +12,15 @@ #if TAMP_EXTENDED_COMPRESS // Extended max pattern: min_pattern_size + 11 + 112 = min_pattern_size + 123 #define MAX_PATTERN_SIZE_EXTENDED (compressor->min_pattern_size + 123) -#define MAX_PATTERN_SIZE (compressor->conf_extended ? MAX_PATTERN_SIZE_EXTENDED : (compressor->min_pattern_size + 13)) +#define MAX_PATTERN_SIZE (compressor->conf.extended ? MAX_PATTERN_SIZE_EXTENDED : (compressor->min_pattern_size + 13)) #else #define MAX_PATTERN_SIZE (compressor->min_pattern_size + 13) #endif -#define WINDOW_SIZE (1 << compressor->conf_window) +#define WINDOW_SIZE (1 << compressor->conf.window) // 0xF because sizeof(TampCompressor.input) == 16; #define input_add(offset) ((compressor->input_pos + offset) & 0xF) #define read_input(offset) (compressor->input[input_add(offset)]) -#define IS_LITERAL_FLAG (1 << compressor->conf_literal) +#define IS_LITERAL_FLAG (1 << compressor->conf.literal) #define FLUSH_CODE (0xAB) @@ -198,15 +198,11 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, uint8_t header = ((conf->window - 8) << 5) | ((conf->literal - 5) << 3) | (conf->use_custom_dictionary << 2) | (conf->extended << 1); - compressor->conf_literal = conf->literal; - compressor->conf_window = conf->window; - compressor->conf_use_custom_dictionary = conf->use_custom_dictionary; - compressor->conf_extended = conf->extended; + compressor->conf = *conf; // Single struct copy compressor->window = window; compressor->min_pattern_size = tamp_compute_min_pattern_size(conf->window, conf->literal); #if TAMP_LAZY_MATCHING - compressor->conf_lazy_matching = conf->lazy_matching; compressor->cached_match_index = -1; // Initialize cache as invalid #endif @@ -222,7 +218,7 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, * @brief Get the last byte written to the window. */ static inline uint8_t get_last_window_byte(TampCompressor *compressor) { - uint16_t prev_pos = (compressor->window_pos - 1) & ((1 << compressor->conf_window) - 1); + uint16_t prev_pos = (compressor->window_pos - 1) & ((1 << compressor->conf.window) - 1); return compressor->window[prev_pos]; } @@ -233,7 +229,7 @@ static inline uint8_t get_last_window_byte(TampCompressor *compressor) { * @param[in] count Number of repeated bytes (must be >= 2). */ static TAMP_NOINLINE void write_rle_token(TampCompressor *compressor, uint8_t count) { - const uint16_t window_mask = (1 << compressor->conf_window) - 1; + const uint16_t window_mask = (1 << compressor->conf.window) - 1; uint8_t symbol = get_last_window_byte(compressor); // Write RLE symbol (12) with literal flag @@ -265,7 +261,7 @@ static TAMP_NOINLINE void write_rle_token(TampCompressor *compressor, uint8_t co */ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compressor, unsigned char *output, size_t output_size, size_t *output_written_size) { - const uint16_t window_mask = (1 << compressor->conf_window) - 1; + const uint16_t window_mask = (1 << compressor->conf.window) - 1; const uint8_t count = compressor->extended_match_count; const uint16_t position = compressor->extended_match_position; tamp_res res; @@ -287,7 +283,7 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compres if (TAMP_UNLIKELY(res != TAMP_OK)) return res; // Write window position - with ≤7 bits remaining, up to 22 bits total - fits - write_to_bit_buffer(compressor, position, compressor->conf_window); + write_to_bit_buffer(compressor, position, compressor->conf.window); // Final flush res = partial_flush(compressor, output, output_size, &flush_bytes); @@ -314,7 +310,7 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compres TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned char *output, size_t output_size, size_t *output_written_size) { tamp_res res; - const uint16_t window_mask = (1 << compressor->conf_window) - 1; + const uint16_t window_mask = (1 << compressor->conf.window) - 1; size_t output_written_size_proxy; if (!output_written_size) output_written_size = &output_written_size_proxy; @@ -339,7 +335,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned #if TAMP_EXTENDED_COMPRESS // Extended: Handle extended match continuation - if (TAMP_UNLIKELY(compressor->conf_extended && compressor->extended_match_count)) { + if (TAMP_UNLIKELY(compressor->conf.extended && compressor->extended_match_count)) { // We're in extended match mode - try to extend the match at the current position const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; const unsigned char *window = compressor->window; @@ -394,7 +390,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned // Extended: Handle RLE accumulation with persistent state // For simplicity in C, we commit RLE immediately when the run ends - if (TAMP_UNLIKELY(compressor->conf_extended)) { + if (TAMP_UNLIKELY(compressor->conf.extended)) { uint8_t last_byte = get_last_window_byte(compressor); // Count and CONSUME matching bytes @@ -429,7 +425,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned #endif // TAMP_EXTENDED_COMPRESS #if TAMP_LAZY_MATCHING - if (compressor->conf_lazy_matching) { + if (compressor->conf.lazy_matching) { // Check if we have a cached match from lazy matching if (TAMP_UNLIKELY(compressor->cached_match_index >= 0)) { match_index = compressor->cached_match_index; @@ -481,14 +477,14 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned // Write LITERAL match_size = 1; unsigned char c = read_input(0); - if (TAMP_UNLIKELY(c >> compressor->conf_literal)) { + if (TAMP_UNLIKELY(c >> compressor->conf.literal)) { return TAMP_EXCESS_BITS; } - write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf_literal + 1); + write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf.literal + 1); } else { #if TAMP_EXTENDED_COMPRESS // Extended: Check for extended match - if (compressor->conf_extended && match_size > compressor->min_pattern_size + 11) { + if (compressor->conf.extended && match_size > compressor->min_pattern_size + 11) { compressor->extended_match_count = match_size; compressor->extended_match_position = match_index; // Consume matched bytes from input @@ -499,8 +495,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned #endif // TAMP_EXTENDED_COMPRESS // Write TOKEN (huffman code + window position) uint8_t huffman_index = match_size - compressor->min_pattern_size; - write_to_bit_buffer(compressor, (huffman_codes[huffman_index] << compressor->conf_window) | match_index, - huffman_bits[huffman_index] + compressor->conf_window); + write_to_bit_buffer(compressor, (huffman_codes[huffman_index] << compressor->conf.window) | match_index, + huffman_bits[huffman_index] + compressor->conf.window); } // Populate Window for (uint8_t i = 0; i < match_size; i++) { @@ -590,7 +586,7 @@ tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output #if TAMP_EXTENDED_COMPRESS // Extended: Flush any pending RLE - if (compressor->conf_extended && compressor->rle_count >= 1) { + if (compressor->conf.extended && compressor->rle_count >= 1) { // Partial flush first to make room res = partial_flush(compressor, output, output_size, &chunk_output_written_size); (*output_written_size) += chunk_output_written_size; @@ -601,10 +597,10 @@ tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output if (compressor->rle_count == 1) { // Single byte - write as literal (can't use RLE token for count < 2) uint8_t literal = get_last_window_byte(compressor); - write_to_bit_buffer(compressor, IS_LITERAL_FLAG | literal, compressor->conf_literal + 1); + write_to_bit_buffer(compressor, IS_LITERAL_FLAG | literal, compressor->conf.literal + 1); // Write to window - const uint16_t window_mask = (1 << compressor->conf_window) - 1; + const uint16_t window_mask = (1 << compressor->conf.window) - 1; compressor->window[compressor->window_pos] = literal; compressor->window_pos = (compressor->window_pos + 1) & window_mask; } else { @@ -622,7 +618,7 @@ tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output } // Extended: Flush any pending extended match - if (compressor->conf_extended && compressor->extended_match_count) { + if (compressor->conf.extended && compressor->extended_match_count) { // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); diff --git a/tamp/_c_src/tamp/compressor.h b/tamp/_c_src/tamp/compressor.h index ff2f1663..577eac58 100644 --- a/tamp/_c_src/tamp/compressor.h +++ b/tamp/_c_src/tamp/compressor.h @@ -22,26 +22,9 @@ typedef struct TampCompressor { unsigned char input[16]; // Input ring buffer /* WARM: read frequently, often cached in locals */ - uint8_t min_pattern_size; // Minimum pattern size (2 bits used; 2 or 3) - uint8_t conf_window; // Window bits (4 bits used; 8-15) - uint8_t conf_literal; // Literal bits (4 bits used; 5-8) - uint8_t conf_use_custom_dictionary; // Custom dictionary (1 bit used; init only) - uint8_t conf_extended; // Extended format enabled (1 bit used) -#if TAMP_LAZY_MATCHING - uint8_t conf_lazy_matching; // Lazy matching enabled (1 bit used) - int16_t cached_match_index; // Lazy matching cache - uint8_t cached_match_size; -#endif - -#if TAMP_EXTENDED_COMPRESS - /* Extended state (only needed when extended compression is enabled) */ - uint8_t rle_count; // Current RLE run length (max 225) - uint8_t extended_match_count; // Current extended match size (max ~126) - uint16_t extended_match_position; // Window position for extended match -#endif - -#else // Use bitfields for reduced memory-usage - + uint8_t min_pattern_size; // Minimum pattern size (2 bits used; 2 or 3) + TampConf conf; +#else // Use bitfields for reduced memory-usage /* HOT: accessed every iteration of the compression loop */ unsigned char *window; // Pointer to window buffer uint32_t bit_buffer; // Bit buffer for output (32 bits) @@ -51,28 +34,25 @@ typedef struct TampCompressor { uint8_t input_pos; // Current position in input buffer (4 bits used; 0-15) unsigned char input[16]; // Input ring buffer - /* WARM: read frequently, often cached in locals. - * Bitfields: min_pattern_size(2) + conf_window(4) + conf_literal(4) + - * conf_use_custom_dictionary(1) + conf_extended(1) + conf_lazy_matching(1) = 13 bits - */ - uint8_t min_pattern_size : 2; // Minimum pattern size (2 or 3) - uint8_t conf_window : 4; // Window bits (8-15) - uint8_t conf_literal : 4; // Literal bits (5-8) - uint8_t conf_use_custom_dictionary : 1; // Custom dictionary (init only) - uint8_t conf_extended : 1; // Extended format enabled + /* WARM: read frequently, often cached in locals */ + uint8_t min_pattern_size; // Minimum pattern size (2 or 3) + TampConf conf; +#endif // TAMP_ESP32 + + /* Fields interleaved to avoid internal padding when both LAZY_MATCHING and EXTENDED_COMPRESS enabled */ #if TAMP_LAZY_MATCHING - uint8_t conf_lazy_matching : 1; // Lazy matching enabled - int16_t cached_match_index; // Lazy matching cache - uint8_t cached_match_size; + int16_t cached_match_index; // Lazy matching cache #endif - #if TAMP_EXTENDED_COMPRESS - uint8_t rle_count; // Current RLE run length (max 225) - uint8_t extended_match_count; // Current extended match size (max ~126) uint16_t extended_match_position; // Window position for extended match #endif - -#endif // TAMP_ESP32 +#if TAMP_LAZY_MATCHING + uint8_t cached_match_size; +#endif +#if TAMP_EXTENDED_COMPRESS + uint8_t rle_count; // Current RLE run length (max 225) + uint8_t extended_match_count; // Current extended match size (max ~126) +#endif } TampCompressor; /** From bbbdfe4b9d325f4720e811282e077b7c899293e4 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 10:47:30 -0500 Subject: [PATCH 066/109] Remove micropython viper references from the README --- README.md | 49 +++---------------------------------------------- 1 file changed, 3 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index f7c21060..78268418 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,8 @@ of RAM and firmware storage. - `pip install tamp` will use a python-bound C implementation optimized for speed. - Micropython: - - Native Module (suggested micropython implementation). + - Native Module. - `mpy_bindings/` - - Viper. - - `tamp/__init__.py`, `tamp/compressor_viper.py`, - `tamp/decompressor_viper.py` - C library: - `tamp/_c_src/` - Javascript/Typescript via Emscripten WASM. @@ -56,14 +53,12 @@ of RAM and firmware storage. # Installation -Tamp contains 4 implementations: +Tamp contains 3 implementations: 1. A reference desktop CPython implementation that is optimized for readability (and **not** speed). 2. A Micropython Native Module implementation (fast). -3. A Micropython Viper implementation (not recommended, please use Native - Module). -4. A C implementation (with python bindings) for accelerated desktop use and to +3. A C implementation (with python bindings) for accelerated desktop use and to be used in C projects (very fast). This section instructs how to install each implementation. @@ -98,42 +93,6 @@ following to `pyproject.toml`. tamp = "https://github.com/BrianPugh/tamp/releases/download/v1.7.0/tamp-1.7.0-mpy1.23-armv6m.mpy" ``` -### MicroPython Viper - -**NOT RECOMMENDED, PLEASE USE NATIVE MODULE** - -For micropython use, there are 3 main files: - -1. `tamp/__init__.py` - Always required. -2. `tamp/decompressor_viper.py` - Required for on-device decompression. -3. `tamp/compressor_viper.py` - Required for on-device compression. - -For example, if on-device decompression isn't used, then do not include -`decompressor_viper.py`. If manually installing, just copy these files to your -microcontroller's `/lib/tamp` folder. - -If using -[mip](https://docs.micropython.org/en/latest/reference/packages.html#installing-packages-with-mip), -tamp can be installed by specifying the appropriate `package-*.json` file. - -```bash -mip install github:brianpugh/tamp # Defaults to package.json: Compressor & Decompressor -mip install github:brianpugh/tamp/package-compressor.json # Compressor only -mip install github:brianpugh/tamp/package-decompressor.json # Decompressor only -``` - -If using [Belay](https://github.com/BrianPugh/belay), tamp can be installed by -adding the following to `pyproject.toml`. - -```toml -[tool.belay.dependencies] -tamp = [ - "https://github.com/BrianPugh/tamp/blob/main/tamp/__init__.py", - "https://github.com/BrianPugh/tamp/blob/main/tamp/compressor_viper.py", - "https://github.com/BrianPugh/tamp/blob/main/tamp/decompressor_viper.py", -] -``` - ## C Copy the `tamp/_c_src/tamp` folder into your project. For more information, see @@ -376,7 +335,6 @@ speed Tamp can achieve. In all tests, a 1KB window (10 bit) was used. | | Compression (bytes/s) | Decompression (bytes/s) | | -------------------------------- | --------------------- | ----------------------- | -| Tamp (MicroPython Viper) | 4,300 | 42,000 | | Tamp (Micropython Native Module) | 31,949 | 1,086,957 | | Tamp (C) | 36,127 | 1,400,600 | | Deflate (micropython builtin) | 6,885 | 294,985 | @@ -393,7 +351,6 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | | Compressor | Decompressor | Compressor + Decompressor | | -------------------------------- | ---------- | ------------ | ------------------------- | -| Tamp (MicroPython Viper) | 4676 | 4372 | 7917 | | Tamp (MicroPython Native) | 3896 | 3559 | 6616 | | Tamp (C, no extended, no stream) | 1648 | 1584 | 3112 | | Tamp (C, no extended) | 2052 | 2036 | 3968 | From 484131a6bb0fc8343f46192c709b342b32302294 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 10:57:21 -0500 Subject: [PATCH 067/109] don't include the stream API in micropython native module. --- Makefile | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b9833532..9fd03d22 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ MOD = tamp # Override -Os with -O2 for better performance (last flag wins) CFLAGS_EXTRA = -O2 -CFLAGS += -Itamp/_c_src -DTAMP_COMPRESSOR=$(TAMP_COMPRESSOR) -DTAMP_DECOMPRESSOR=$(TAMP_DECOMPRESSOR) +CFLAGS += -Itamp/_c_src -DTAMP_COMPRESSOR=$(TAMP_COMPRESSOR) -DTAMP_DECOMPRESSOR=$(TAMP_DECOMPRESSOR) -DTAMP_STREAM=0 # Compiler-specific flags based on target architecture ifeq ($(filter $(ARCH),x86 x64),) # Cross-compiling for embedded (ARM, xtensa) - use GCC flags diff --git a/README.md b/README.md index 78268418..f191aa95 100644 --- a/README.md +++ b/README.md @@ -351,7 +351,7 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | | Compressor | Decompressor | Compressor + Decompressor | | -------------------------------- | ---------- | ------------ | ------------------------- | -| Tamp (MicroPython Native) | 3896 | 3559 | 6616 | +| Tamp (MicroPython Native) | 4736 | 4339 | 8236 | | Tamp (C, no extended, no stream) | 1648 | 1584 | 3112 | | Tamp (C, no extended) | 2052 | 2036 | 3968 | | Tamp (C, extended, no stream) | 2796 | 2436 | 5112 | From 6d3d26f4d59e6cb6ac2f9c2defc4f853f9c8601f Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 11:07:05 -0500 Subject: [PATCH 068/109] Add likely/unlikely hints to micropython bindings --- mpy_bindings/bindings.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mpy_bindings/bindings.c b/mpy_bindings/bindings.c index 125f31c5..0cdf9d0a 100644 --- a/mpy_bindings/bindings.c +++ b/mpy_bindings/bindings.c @@ -14,9 +14,9 @@ _Static_assert(CHUNK_SIZE >= 32, "CHUNK_SIZE must be >= 32 to hold flush output" #define mp_type_bytearray (*(mp_obj_type_t *)(mp_load_global(MP_QSTR_bytearray))) static void TAMP_CHECK(tamp_res res) { - if (res == TAMP_EXCESS_BITS) { + if (TAMP_UNLIKELY(res == TAMP_EXCESS_BITS)) { nlr_raise(mp_obj_new_exception(mp_load_global(MP_QSTR_ExcessBitsError))); - } else if (res < TAMP_OK) { + } else if (TAMP_UNLIKELY(res < TAMP_OK)) { mp_raise_ValueError(""); } } @@ -77,7 +77,7 @@ static mp_obj_t compressor_make_new(const mp_obj_type_t *type, size_t n_args, si mp_buffer_info_t dictionary_buffer_info; mp_get_buffer_raise(o->dictionary, &dictionary_buffer_info, MP_BUFFER_RW); - if (dictionary_buffer_info.len < (1 << conf.window)) { + if (TAMP_UNLIKELY(dictionary_buffer_info.len < (1 << conf.window))) { mp_raise_ValueError(""); } @@ -177,7 +177,7 @@ static mp_obj_t decompressor_make_new(const mp_obj_type_t *type, size_t n_args, const uint16_t window_size = 1 << conf.window; if (o->dictionary == mp_const_none) { - if (conf.use_custom_dictionary) { + if (TAMP_UNLIKELY(conf.use_custom_dictionary)) { mp_raise_ValueError(""); } o->dictionary = mp_obj_new_bytearray_by_ref(window_size, m_malloc(window_size)); @@ -186,7 +186,7 @@ static mp_obj_t decompressor_make_new(const mp_obj_type_t *type, size_t n_args, { mp_buffer_info_t dictionary_buffer_info; mp_get_buffer_raise(o->dictionary, &dictionary_buffer_info, MP_BUFFER_RW); - if (dictionary_buffer_info.len < window_size) { + if (TAMP_UNLIKELY(dictionary_buffer_info.len < window_size)) { mp_raise_ValueError(""); } From a5d7691a2af82849d2425504041670204b4b604b Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 13:04:58 -0500 Subject: [PATCH 069/109] don't inline write_to_bitbuffer to save around 60 bytes. --- README.md | 10 +++++----- tamp/_c_src/tamp/compressor.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f191aa95..ef66b64f 100644 --- a/README.md +++ b/README.md @@ -351,11 +351,11 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | | Compressor | Decompressor | Compressor + Decompressor | | -------------------------------- | ---------- | ------------ | ------------------------- | -| Tamp (MicroPython Native) | 4736 | 4339 | 8236 | -| Tamp (C, no extended, no stream) | 1648 | 1584 | 3112 | -| Tamp (C, no extended) | 2052 | 2036 | 3968 | -| Tamp (C, extended, no stream) | 2796 | 2436 | 5112 | -| Tamp (C, extended) | 3200 | 2888 | 5968 | +| Tamp (MicroPython Native) | 4708 | 4339 | 8208 | +| Tamp (C, no extended, no stream) | 1596 | 1584 | 3060 | +| Tamp (C, no extended) | 2000 | 2036 | 3916 | +| Tamp (C, extended, no stream) | 2736 | 2436 | 5052 | +| Tamp (C, extended) | 3140 | 2888 | 5908 | | Heatshrink (C) | 2956 | 3876 | 6832 | | uzlib (C) | 2355 | 3963 | 6318 | diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 991267b7..2f25aeba 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -40,7 +40,7 @@ static const uint8_t huffman_bits[] = {0x2, 0x3, 0x5, 0x5, 0x6, 0x7, 0x7, 0x7, 0 #define EXTENDED_MATCH_MIN_OUTPUT_BYTES 6 #endif -static inline void write_to_bit_buffer(TampCompressor *compressor, uint32_t bits, uint8_t n_bits) { +static TAMP_NOINLINE void write_to_bit_buffer(TampCompressor *compressor, uint32_t bits, uint8_t n_bits) { compressor->bit_buffer_pos += n_bits; compressor->bit_buffer |= bits << (32 - compressor->bit_buffer_pos); } From d6325dd1980b09de45223665d5b0b00946c4bcaf Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 13:24:29 -0500 Subject: [PATCH 070/109] cache LFS files. --- .github/workflows/tests.yaml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 47a77b95..35c8ca5a 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -110,7 +110,18 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive - lfs: true + lfs: false + + - name: Cache LFS files + uses: actions/cache@v4 + id: lfs-cache + with: + path: .git/lfs + key: lfs-${{ hashFiles('datasets/v1-compressed/**', 'datasets/extended-compressed/**') }} + restore-keys: lfs- + + - name: Pull LFS files + run: git lfs pull - name: Set up python 3.13 (for Poetry) id: setup-python-system From 47184212d2c2c9235dddcccd9fa3ff2dacbe5c29 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 13:32:31 -0500 Subject: [PATCH 071/109] immediately update lfs cache --- .github/workflows/tests.yaml | 49 ++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 35c8ca5a..f7d50656 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -94,8 +94,48 @@ jobs: source .venv/bin/activate SKIP=wasm-eslint,wasm-npm-test,wasm-file-validation,typescript-check,package-json-lint pre-commit run --show-diff-on-failure --color=always --all-files + cache-lfs: + name: 'Cache LFS files' + runs-on: ubuntu-latest + outputs: + cache-key: ${{ steps.lfs-key.outputs.key }} + steps: + - name: Check out repository + uses: actions/checkout@v4 + with: + submodules: recursive + lfs: false + + - name: Compute LFS cache key + id: lfs-key + run: | + # Hash pointer files before they get replaced by git lfs pull + # Use find for reliable recursive globbing, sort for deterministic order + hash=$(find datasets/v1-compressed datasets/extended-compressed -name '*.tamp' -type f | sort | xargs cat | sha256sum | cut -d' ' -f1) + echo "key=lfs-${hash}" >> $GITHUB_OUTPUT + echo "Cache key: lfs-${hash}" + + - name: Restore LFS cache + uses: actions/cache/restore@v4 + id: lfs-cache + with: + path: .git/lfs + key: ${{ steps.lfs-key.outputs.key }} + + - name: Pull LFS files + if: steps.lfs-cache.outputs.cache-hit != 'true' + run: git lfs pull + + - name: Save LFS cache + if: steps.lfs-cache.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: .git/lfs + key: ${{ steps.lfs-key.outputs.key }} + test: name: 'Test Python ${{ matrix.python-version }}' + needs: cache-lfs timeout-minutes: 15 runs-on: ubuntu-latest strategy: @@ -112,13 +152,12 @@ jobs: submodules: recursive lfs: false - - name: Cache LFS files - uses: actions/cache@v4 - id: lfs-cache + - name: Restore LFS cache + uses: actions/cache/restore@v4 with: path: .git/lfs - key: lfs-${{ hashFiles('datasets/v1-compressed/**', 'datasets/extended-compressed/**') }} - restore-keys: lfs- + key: ${{ needs.cache-lfs.outputs.cache-key }} + fail-on-cache-miss: true - name: Pull LFS files run: git lfs pull From ae9e8ffe1f3f3ca07b41c75cf58d2497bedde12b Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 14:09:35 -0500 Subject: [PATCH 072/109] stop testing the viper implementation. --- tests/test_compressor.py | 6 ------ tests/test_compressor_decompressor.py | 11 +++-------- tests/test_decompressor.py | 11 +++++++---- tests/test_pseudorandom.py | 6 ------ 4 files changed, 10 insertions(+), 24 deletions(-) diff --git a/tests/test_compressor.py b/tests/test_compressor.py index eeeff9db..309dcd78 100644 --- a/tests/test_compressor.py +++ b/tests/test_compressor.py @@ -36,12 +36,6 @@ NativeExcessBitsError = ExcessBitsError if micropython: - from tamp.compressor_viper import Compressor as ViperCompressor - from tamp.compressor_viper import compress as viper_compress - - Compressors.append(ViperCompressor) - compresses.append(viper_compress) - try: from tamp_native import Compressor as NativeCompressor from tamp_native import ExcessBitsError as NativeExcessBitsError diff --git a/tests/test_compressor_decompressor.py b/tests/test_compressor_decompressor.py index 22b964ad..aa0a8e1e 100644 --- a/tests/test_compressor_decompressor.py +++ b/tests/test_compressor_decompressor.py @@ -19,21 +19,16 @@ CCompressor = None CDecompressor = None - ViperCompressor = None - ViperDecompressor = None NativeCompressor = None NativeDecompressor = None else: - # MicroPython: only test Viper and Native implementations + # MicroPython: only test Native implementation # Pure Python and Cython implementations use CPython-specific features PyCompressor = None PyDecompressor = None CCompressor = None CDecompressor = None - from tamp.compressor_viper import Compressor as ViperCompressor - from tamp.decompressor_viper import Decompressor as ViperDecompressor - try: from tamp_native import Compressor as NativeCompressor from tamp_native import Decompressor as NativeDecompressor @@ -43,8 +38,8 @@ NativeDecompressor = None -Compressors = (PyCompressor, CCompressor, ViperCompressor, NativeCompressor) -Decompressors = (PyDecompressor, CDecompressor, ViperDecompressor, NativeDecompressor) +Compressors = (PyCompressor, CCompressor, NativeCompressor) +Decompressors = (PyDecompressor, CDecompressor, NativeDecompressor) def walk_compressors_decompressors(): diff --git a/tests/test_decompressor.py b/tests/test_decompressor.py index 26e9d2fd..df4cc0b0 100644 --- a/tests/test_decompressor.py +++ b/tests/test_decompressor.py @@ -26,11 +26,14 @@ pass else: - from tamp.decompressor_viper import Decompressor as ViperDecompressor - from tamp.decompressor_viper import decompress as viper_decompress + try: + from tamp_native import Decompressor as NativeDecompressor + from tamp_native import decompress as native_decompress - Decompressors.append(ViperDecompressor) - decompresses.append(viper_decompress) + Decompressors.append(NativeDecompressor) + decompresses.append(native_decompress) + except ImportError: + pass class TestDecompressor(unittest.TestCase): diff --git a/tests/test_pseudorandom.py b/tests/test_pseudorandom.py index f2ca6f54..62d0e756 100644 --- a/tests/test_pseudorandom.py +++ b/tests/test_pseudorandom.py @@ -12,12 +12,6 @@ micropython = None if micropython: - import tamp.compressor_viper - import tamp.decompressor_viper - - modules.append(tamp.compressor_viper) - modules.append(tamp.decompressor_viper) - try: import tamp_native From 4392f2b805f38c271982f3afeff5abbc04269069 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 14:43:14 -0500 Subject: [PATCH 073/109] skip the dataset tests when building wheels --- .github/workflows/build_wheels.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 2f9bb482..17e29e85 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -117,7 +117,7 @@ jobs: CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {package}/tests + CIBW_TEST_COMMAND: pytest {package}/tests --no-dataset - uses: actions/upload-artifact@v4 with: @@ -164,7 +164,7 @@ jobs: CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {package}/tests + CIBW_TEST_COMMAND: pytest {package}/tests --no-dataset - uses: actions/upload-artifact@v4 with: @@ -212,7 +212,7 @@ jobs: CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {package}/tests + CIBW_TEST_COMMAND: pytest {package}/tests --no-dataset - uses: actions/upload-artifact@v4 with: @@ -254,7 +254,7 @@ jobs: CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {package}/tests + CIBW_TEST_COMMAND: pytest {package}/tests --no-dataset - uses: actions/upload-artifact@v4 with: @@ -302,7 +302,7 @@ jobs: CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {package}/tests + CIBW_TEST_COMMAND: pytest {package}/tests --no-dataset - uses: actions/upload-artifact@v4 with: @@ -344,7 +344,7 @@ jobs: CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {package}/tests + CIBW_TEST_COMMAND: pytest {package}/tests --no-dataset - uses: actions/upload-artifact@v4 with: @@ -385,7 +385,7 @@ jobs: CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {package}/tests + CIBW_TEST_COMMAND: pytest {package}/tests --no-dataset - uses: actions/upload-artifact@v4 with: From 528077044d3f53e7a98d512d9da33b0d97e7acf7 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 14:58:47 -0500 Subject: [PATCH 074/109] update expected enwik8 hash --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index f7d50656..79071a9b 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -267,7 +267,7 @@ jobs: implementation: [desktop, embedded] env: POETRY_HOME: '~/poetry' - EXPECTED_COMPRESSED_HASH: '02e05af059a0040d641988075cf1dfc479a084f9a34b5c8a348354211c5fa038' + EXPECTED_COMPRESSED_HASH: '5996293c04a89014580334da4d69374ee414f1ae5087b813bab69209573b6966' steps: - name: Check out repository From f8ad5d3b672176685d0897147e5c27971964d085 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 21:13:15 -0500 Subject: [PATCH 075/109] Fix window corruption. --- tamp/_c_src/tamp/compressor.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 2f25aeba..3a34a33c 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -291,13 +291,28 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compres if (TAMP_UNLIKELY(res != TAMP_OK)) return res; // Write to window (up to end of buffer, no wrap) + // Handle overlap: when destination is ahead of source and they overlap, + // we must copy in reverse order to avoid reading corrupted data. uint16_t remaining = WINDOW_SIZE - compressor->window_pos; uint8_t window_write = MIN(count, remaining); - for (uint8_t i = 0; i < window_write; i++) { - compressor->window[compressor->window_pos] = compressor->window[position + i]; - compressor->window_pos++; + + // Calculate distance from source to destination in circular buffer + const uint16_t src_to_dst = (compressor->window_pos - position) & window_mask; + + if (TAMP_UNLIKELY(src_to_dst < window_write && src_to_dst > 0)) { + // Overlap case: copy in reverse order + for (uint8_t i = window_write; i-- > 0;) { + compressor->window[(compressor->window_pos + i) & window_mask] = compressor->window[position + i]; + } + compressor->window_pos = (compressor->window_pos + window_write) & window_mask; + } else { + // Normal case: forward copy + for (uint8_t i = 0; i < window_write; i++) { + compressor->window[compressor->window_pos] = compressor->window[position + i]; + compressor->window_pos++; + } + compressor->window_pos &= window_mask; } - compressor->window_pos &= window_mask; // Reset extended match state compressor->extended_match_count = 0; @@ -389,7 +404,6 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } // Extended: Handle RLE accumulation with persistent state - // For simplicity in C, we commit RLE immediately when the run ends if (TAMP_UNLIKELY(compressor->conf.extended)) { uint8_t last_byte = get_last_window_byte(compressor); @@ -483,13 +497,14 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf.literal + 1); } else { #if TAMP_EXTENDED_COMPRESS - // Extended: Check for extended match + // Extended: Start extended match continuation if (compressor->conf.extended && match_size > compressor->min_pattern_size + 11) { compressor->extended_match_count = match_size; compressor->extended_match_position = match_index; // Consume matched bytes from input compressor->input_pos = input_add(match_size); compressor->input_size -= match_size; + // Return - continuation code at start of poll will try to extend or emit return TAMP_OK; } #endif // TAMP_EXTENDED_COMPRESS From ad98ad3e124b9721d7dd611dc309eba6a398c439 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 21:20:09 -0500 Subject: [PATCH 076/109] share window_copy between compressor/decompressor. --- tamp/_c_src/tamp/common.c | 30 +++++++++++++++++++ tamp/_c_src/tamp/common.h | 20 +++++++++++++ tamp/_c_src/tamp/compressor.c | 21 +------------- tamp/_c_src/tamp/decompressor.c | 51 ++------------------------------- 4 files changed, 53 insertions(+), 69 deletions(-) diff --git a/tamp/_c_src/tamp/common.c b/tamp/_c_src/tamp/common.c index f88dd344..b1ef1b9b 100644 --- a/tamp/_c_src/tamp/common.c +++ b/tamp/_c_src/tamp/common.c @@ -38,6 +38,36 @@ int8_t tamp_compute_min_pattern_size(uint8_t window, uint8_t literal) { return 2 + (window > (10 + ((literal - 5) << 1))); } +void tamp_window_copy(unsigned char *window, uint16_t *window_pos, uint16_t window_offset, uint8_t match_size, + uint16_t window_mask) { + /* Calculate distance from source to destination in circular buffer. + * src_to_dst = (dst - src) & mask gives the forward distance. */ + const uint16_t src_to_dst = (*window_pos - window_offset) & window_mask; + + /* Critical overlap case: destination is AHEAD of source and they overlap. + * When dst > src by less than match_size, a forward copy corrupts data because + * we write to positions before reading from them. + * + * Example: src=100, dst=105, match_size=8 + * - Forward copy at i=5 would read window[105], but we already overwrote it at i=0! + * - Must copy in REVERSE order (end to start) to read source bytes before overwriting. + */ + if (TAMP_UNLIKELY(src_to_dst < match_size && src_to_dst > 0)) { + /* Copy in reverse order: start from last byte, work backwards to first byte. + * This ensures we read all overlapping source bytes before they're overwritten. + * Destination wraps via mask; source doesn't need wrapping (pre-validated bounds). */ + for (uint8_t i = match_size; i-- > 0;) { + window[(*window_pos + i) & window_mask] = window[window_offset + i]; + } + *window_pos = (*window_pos + match_size) & window_mask; + } else { + for (uint8_t i = 0; i < match_size; i++) { + window[*window_pos] = window[window_offset + i]; + *window_pos = (*window_pos + 1) & window_mask; + } + } +} + /******************************************************************************* * Built-in I/O handler implementations ******************************************************************************/ diff --git a/tamp/_c_src/tamp/common.h b/tamp/_c_src/tamp/common.h index c33a8cb4..706ce0fa 100644 --- a/tamp/_c_src/tamp/common.h +++ b/tamp/_c_src/tamp/common.h @@ -324,6 +324,26 @@ void tamp_initialize_dictionary(unsigned char *buffer, size_t size); */ int8_t tamp_compute_min_pattern_size(uint8_t window, uint8_t literal); +/** + * @brief Copy pattern from window to window, updating window_pos. + * + * Handles potential overlap between source and destination regions by + * copying backwards when the destination would "catch up" to the source. + * + * IMPORTANT: Caller must validate that (window_offset + match_size) does not + * exceed window bounds before calling this function. This function assumes + * window_offset and match_size are pre-validated and does not perform + * bounds checking on source reads. + * + * @param window Circular buffer (size must be power of 2) + * @param window_pos Current write position (updated by this function) + * @param window_offset Source position to copy from + * @param match_size Number of bytes to copy + * @param window_mask Bitmask for wrapping (window_size - 1) + */ +void tamp_window_copy(unsigned char *window, uint16_t *window_pos, uint16_t window_offset, uint8_t match_size, + uint16_t window_mask); + #ifdef __cplusplus } #endif diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 3a34a33c..6d98086b 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -291,28 +291,9 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compres if (TAMP_UNLIKELY(res != TAMP_OK)) return res; // Write to window (up to end of buffer, no wrap) - // Handle overlap: when destination is ahead of source and they overlap, - // we must copy in reverse order to avoid reading corrupted data. uint16_t remaining = WINDOW_SIZE - compressor->window_pos; uint8_t window_write = MIN(count, remaining); - - // Calculate distance from source to destination in circular buffer - const uint16_t src_to_dst = (compressor->window_pos - position) & window_mask; - - if (TAMP_UNLIKELY(src_to_dst < window_write && src_to_dst > 0)) { - // Overlap case: copy in reverse order - for (uint8_t i = window_write; i-- > 0;) { - compressor->window[(compressor->window_pos + i) & window_mask] = compressor->window[position + i]; - } - compressor->window_pos = (compressor->window_pos + window_write) & window_mask; - } else { - // Normal case: forward copy - for (uint8_t i = 0; i < window_write; i++) { - compressor->window[compressor->window_pos] = compressor->window[position + i]; - compressor->window_pos++; - } - compressor->window_pos &= window_mask; - } + tamp_window_copy(compressor->window, &compressor->window_pos, position, window_write, window_mask); // Reset extended match state compressor->extended_match_count = 0; diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index a17cb1d3..23ab4ed1 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -33,53 +33,6 @@ static const uint8_t HUFFMAN_TABLE[128] = { 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; -/** - * @brief Copy pattern from window to window, updating window_pos. - * - * Handles potential overlap between source and destination regions by - * copying backwards when the destination would "catch up" to the source. - * - * IMPORTANT: Caller must validate that (window_offset + match_size) does not - * exceed window bounds before calling this function. This function assumes - * window_offset and match_size are pre-validated and does not perform - * bounds checking on source reads. - * - * @param window Circular buffer (size must be power of 2) - * @param window_pos Current write position (updated by this function) - * @param window_offset Source position to copy from - * @param match_size Number of bytes to copy - * @param window_mask Bitmask for wrapping (window_size - 1) - */ -TAMP_NOINLINE static void window_copy(unsigned char* window, uint16_t* window_pos, uint16_t window_offset, - uint8_t match_size, uint16_t window_mask) { - /* Calculate distance from source to destination in circular buffer. - * src_to_dst = (dst - src) & mask gives the forward distance. */ - const uint16_t src_to_dst = (*window_pos - window_offset) & window_mask; - - /* Critical overlap case: destination is AHEAD of source and they overlap. - * When dst > src by less than match_size, a forward copy corrupts data because - * we write to positions before reading from them. - * - * Example: src=100, dst=105, match_size=8 - * - Forward copy at i=5 would read window[105], but we already overwrote it at i=0! - * - Must copy in REVERSE order (end to start) to read source bytes before overwriting. - */ - if (TAMP_UNLIKELY(src_to_dst < match_size && src_to_dst > 0)) { - /* Copy in reverse order: start from last byte, work backwards to first byte. - * This ensures we read all overlapping source bytes before they're overwritten. - * Destination wraps via mask; source doesn't need wrapping (pre-validated bounds). */ - for (uint8_t i = match_size; i-- > 0;) { - window[(*window_pos + i) & window_mask] = window[window_offset + i]; - } - *window_pos = (*window_pos + match_size) & window_mask; - } else { - for (uint8_t i = 0; i < match_size; i++) { - window[*window_pos] = window[window_offset + i]; - *window_pos = (*window_pos + 1) & window_mask; - } - } -} - /** * @brief Decode huffman symbol + optional trailing bits from bit buffer. * @@ -292,7 +245,7 @@ static tamp_res decode_extended_match(TampDecompressor* d, unsigned char** outpu uint16_t wp = d->window_pos; uint16_t remaining = window_size - wp; uint8_t window_write = (match_size < remaining) ? match_size : remaining; - window_copy(d->window, &wp, window_offset, window_write, window_size - 1); + tamp_window_copy(d->window, &wp, window_offset, window_write, window_size - 1); d->window_pos = wp; } @@ -543,7 +496,7 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne if (TAMP_LIKELY(decompressor->skip_bytes == 0)) { uint16_t wp = decompressor->window_pos; - window_copy(decompressor->window, &wp, window_offset, match_size, window_mask); + tamp_window_copy(decompressor->window, &wp, window_offset, match_size, window_mask); decompressor->window_pos = wp; } } From 4bd7b10a83553ba14d9d7a76a8291290229e63ef Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Wed, 4 Feb 2026 22:21:58 -0500 Subject: [PATCH 077/109] cache-on-the-stack unpacked bitfields. --- README.md | 10 +++++----- tamp/_c_src/tamp/compressor.c | 22 ++++++++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index ef66b64f..971a760d 100644 --- a/README.md +++ b/README.md @@ -351,11 +351,11 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | | Compressor | Decompressor | Compressor + Decompressor | | -------------------------------- | ---------- | ------------ | ------------------------- | -| Tamp (MicroPython Native) | 4708 | 4339 | 8208 | -| Tamp (C, no extended, no stream) | 1596 | 1584 | 3060 | -| Tamp (C, no extended) | 2000 | 2036 | 3916 | -| Tamp (C, extended, no stream) | 2736 | 2436 | 5052 | -| Tamp (C, extended) | 3140 | 2888 | 5908 | +| Tamp (MicroPython Native) | 4708 | 4339 | 8124 | +| Tamp (C, no extended, no stream) | 1708 | 1584 | 3088 | +| Tamp (C, no extended) | 2112 | 2036 | 3944 | +| Tamp (C, extended, no stream) | 2768 | 2436 | 5000 | +| Tamp (C, extended) | 3172 | 2888 | 5856 | | Heatshrink (C) | 2956 | 3876 | 6832 | | uzlib (C) | 2355 | 3963 | 6318 | diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 6d98086b..6d889a8b 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -306,7 +306,13 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compres TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned char *output, size_t output_size, size_t *output_written_size) { tamp_res res; - const uint16_t window_mask = (1 << compressor->conf.window) - 1; + // Cache bitfield values for faster access in hot path + const uint8_t conf_window = compressor->conf.window; + const uint8_t conf_literal = compressor->conf.literal; + const uint16_t window_mask = (1 << conf_window) - 1; +#if TAMP_EXTENDED_COMPRESS + const bool conf_extended = compressor->conf.extended; +#endif size_t output_written_size_proxy; if (!output_written_size) output_written_size = &output_written_size_proxy; @@ -331,7 +337,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned #if TAMP_EXTENDED_COMPRESS // Extended: Handle extended match continuation - if (TAMP_UNLIKELY(compressor->conf.extended && compressor->extended_match_count)) { + if (TAMP_UNLIKELY(conf_extended && compressor->extended_match_count)) { // We're in extended match mode - try to extend the match at the current position const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; const unsigned char *window = compressor->window; @@ -385,7 +391,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } // Extended: Handle RLE accumulation with persistent state - if (TAMP_UNLIKELY(compressor->conf.extended)) { + if (TAMP_UNLIKELY(conf_extended)) { uint8_t last_byte = get_last_window_byte(compressor); // Count and CONSUME matching bytes @@ -472,14 +478,14 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned // Write LITERAL match_size = 1; unsigned char c = read_input(0); - if (TAMP_UNLIKELY(c >> compressor->conf.literal)) { + if (TAMP_UNLIKELY(c >> conf_literal)) { return TAMP_EXCESS_BITS; } - write_to_bit_buffer(compressor, IS_LITERAL_FLAG | c, compressor->conf.literal + 1); + write_to_bit_buffer(compressor, (1 << conf_literal) | c, conf_literal + 1); } else { #if TAMP_EXTENDED_COMPRESS // Extended: Start extended match continuation - if (compressor->conf.extended && match_size > compressor->min_pattern_size + 11) { + if (conf_extended && match_size > compressor->min_pattern_size + 11) { compressor->extended_match_count = match_size; compressor->extended_match_position = match_index; // Consume matched bytes from input @@ -491,8 +497,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned #endif // TAMP_EXTENDED_COMPRESS // Write TOKEN (huffman code + window position) uint8_t huffman_index = match_size - compressor->min_pattern_size; - write_to_bit_buffer(compressor, (huffman_codes[huffman_index] << compressor->conf.window) | match_index, - huffman_bits[huffman_index] + compressor->conf.window); + write_to_bit_buffer(compressor, (huffman_codes[huffman_index] << conf_window) | match_index, + huffman_bits[huffman_index] + conf_window); } // Populate Window for (uint8_t i = 0; i < match_size; i++) { From c582fb757418f145e490338ef83995fa5c13d08e Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 10:49:00 -0500 Subject: [PATCH 078/109] thorough extended-match search. --- tamp/_c_src/tamp/compressor.c | 67 ++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 6d889a8b..1cc7cae9 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -156,6 +156,56 @@ static inline void find_best_match(TampCompressor *compressor, uint16_t *match_i #endif +#if TAMP_EXTENDED_COMPRESS +/** + * @brief Search for extended match continuation using implicit pattern comparison. + * + * Searches for pattern: window[current_pos:current_pos+current_count] + input[0...] + * starting from current_pos. Uses implicit comparison - no buffer allocation. + * + * @param[in] compressor TampCompressor object + * @param[in] current_pos Current match position in window (also search start) + * @param[in] current_count Current match length + * @param[out] new_pos Position of found longer match + * @param[out] new_count Length of found match + */ +static inline void find_extended_match(TampCompressor *compressor, uint16_t current_pos, uint8_t current_count, + uint16_t *new_pos, uint8_t *new_count) { + *new_count = 0; + const unsigned char *window = compressor->window; + const uint16_t window_size = WINDOW_SIZE; + const uint8_t max_pattern = MIN(current_count + compressor->input_size, MAX_PATTERN_SIZE); + + // Need at least 2 bytes in target to search + if (max_pattern < 2) return; + + // First two bytes of pattern (from window at current_pos) + const uint8_t first_byte = window[current_pos]; + const uint8_t second_byte = window[current_pos + 1]; + + for (uint16_t cand = current_pos; cand + max_pattern <= window_size; cand++) { + // Quick 2-byte check + if (TAMP_LIKELY(window[cand] != first_byte)) continue; + if (TAMP_LIKELY(window[cand + 1] != second_byte)) continue; + + // Extend match using implicit comparison + uint8_t match_len = 2; + for (uint8_t i = 2; i < max_pattern; i++) { + // Get target byte: from window if i < current_count, else from input + uint8_t target = (i < current_count) ? window[current_pos + i] : read_input(i - current_count); + if (window[cand + i] != target) break; + match_len = i + 1; + } + + if (match_len > *new_count) { + *new_count = match_len; + *new_pos = cand; + if (match_len == max_pattern) return; + } + } +} +#endif // TAMP_EXTENDED_COMPRESS + #if TAMP_LAZY_MATCHING /** * @brief Check if writing a single byte will overlap with a future match section. @@ -376,7 +426,22 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned compressor->input_size--; // Continue to next iteration to try extending further } else { - // Match ended - emit current match + // O(1) extension failed - search for longer match from current position + uint16_t new_pos; + uint8_t new_count; + find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); + + if (new_count > current_count) { + // Found longer match - update and continue + uint8_t extra_bytes = new_count - current_count; + compressor->extended_match_position = new_pos; + compressor->extended_match_count = new_count; + compressor->input_pos = input_add(extra_bytes); + compressor->input_size -= extra_bytes; + continue; + } + + // No better match - emit current match // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; size_t token_bytes; From f016879d7898492582b8576ccdb5f03b70f3f7e6 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 12:50:40 -0500 Subject: [PATCH 079/109] better RLE vs match tradeoff in C compressor --- tamp/_c_src/tamp/compressor.c | 156 ++++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 63 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 1cc7cae9..d9c4cdb9 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -161,12 +161,13 @@ static inline void find_best_match(TampCompressor *compressor, uint16_t *match_i * @brief Search for extended match continuation using implicit pattern comparison. * * Searches for pattern: window[current_pos:current_pos+current_count] + input[0...] - * starting from current_pos. Uses implicit comparison - no buffer allocation. + * starting from current_pos. Returns the longest match found (which may be at + * current_pos itself if O(1) extension works, or at a different position). * * @param[in] compressor TampCompressor object * @param[in] current_pos Current match position in window (also search start) * @param[in] current_count Current match length - * @param[out] new_pos Position of found longer match + * @param[out] new_pos Position of found match (only valid if new_count > current_count) * @param[out] new_count Length of found match */ static inline void find_extended_match(TampCompressor *compressor, uint16_t current_pos, uint8_t current_count, @@ -176,27 +177,46 @@ static inline void find_extended_match(TampCompressor *compressor, uint16_t curr const uint16_t window_size = WINDOW_SIZE; const uint8_t max_pattern = MIN(current_count + compressor->input_size, MAX_PATTERN_SIZE); - // Need at least 2 bytes in target to search - if (max_pattern < 2) return; + // Need at least current_count + 1 to find a longer match, and room in window + if (max_pattern <= current_count) return; + if (current_pos + current_count + 1 > window_size) return; // First two bytes of pattern (from window at current_pos) const uint8_t first_byte = window[current_pos]; const uint8_t second_byte = window[current_pos + 1]; - for (uint16_t cand = current_pos; cand + max_pattern <= window_size; cand++) { + // The target byte to extend by (input[0], like Python does) + const uint8_t extend_byte = read_input(0); + + // Search candidates that can fit at least current_count + 1 bytes + for (uint16_t cand = current_pos; cand + current_count + 1 <= window_size; cand++) { // Quick 2-byte check if (TAMP_LIKELY(window[cand] != first_byte)) continue; if (TAMP_LIKELY(window[cand + 1] != second_byte)) continue; - // Extend match using implicit comparison - uint8_t match_len = 2; - for (uint8_t i = 2; i < max_pattern; i++) { - // Get target byte: from window if i < current_count, else from input - uint8_t target = (i < current_count) ? window[current_pos + i] : read_input(i - current_count); + // Check if all current_count bytes match + bool full_match = true; + for (uint8_t i = 2; i < current_count; i++) { + if (window[cand + i] != window[current_pos + i]) { + full_match = false; + break; + } + } + if (!full_match) continue; + + // Check if the extension byte matches + if (window[cand + current_count] != extend_byte) continue; + + // Found a match of current_count + 1 bytes - now extend as far as possible + const uint8_t cand_max = MIN(max_pattern, window_size - cand); + uint8_t match_len = current_count + 1; + for (uint8_t i = current_count + 1; i < cand_max; i++) { + uint8_t target = read_input(i - current_count); if (window[cand + i] != target) break; match_len = i + 1; } + // Track this match (guaranteed > current_count) if (match_len > *new_count) { *new_count = match_len; *new_pos = cand; @@ -388,9 +408,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned #if TAMP_EXTENDED_COMPRESS // Extended: Handle extended match continuation if (TAMP_UNLIKELY(conf_extended && compressor->extended_match_count)) { - // We're in extended match mode - try to extend the match at the current position + // We're in extended match mode - try to extend the match const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; - const unsigned char *window = compressor->window; while (compressor->input_size > 0) { const uint16_t current_pos = compressor->extended_match_position; @@ -418,38 +437,29 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned return TAMP_OK; } - // O(1) extension check: does the next byte at current position match input? - if (window[current_pos + current_count] == read_input(0)) { - // Extension successful - consume input byte and increment count - compressor->extended_match_count++; - compressor->input_pos = input_add(1); - compressor->input_size--; - // Continue to next iteration to try extending further - } else { - // O(1) extension failed - search for longer match from current position - uint16_t new_pos; - uint8_t new_count; - find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); - - if (new_count > current_count) { - // Found longer match - update and continue - uint8_t extra_bytes = new_count - current_count; - compressor->extended_match_position = new_pos; - compressor->extended_match_count = new_count; - compressor->input_pos = input_add(extra_bytes); - compressor->input_size -= extra_bytes; - continue; - } - - // No better match - emit current match - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; - size_t token_bytes; - res = write_extended_match_token(compressor, output, output_size, &token_bytes); - (*output_written_size) += token_bytes; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - return TAMP_OK; + // Search for longer match (includes O(1) extension at same position) + uint16_t new_pos; + uint8_t new_count; + find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); + + if (new_count > current_count) { + // Found longer match - update and continue + uint8_t extra_bytes = new_count - current_count; + compressor->extended_match_position = new_pos; + compressor->extended_match_count = new_count; + compressor->input_pos = input_add(extra_bytes); + compressor->input_size -= extra_bytes; + continue; } + + // No longer match found - emit current match + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; + size_t token_bytes; + res = write_extended_match_token(compressor, output, output_size, &token_bytes); + (*output_written_size) += token_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + return TAMP_OK; } // Ran out of input while extending - return and wait for more return TAMP_OK; @@ -459,32 +469,52 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned if (TAMP_UNLIKELY(conf_extended)) { uint8_t last_byte = get_last_window_byte(compressor); - // Count and CONSUME matching bytes - while (compressor->input_size > 0 && compressor->rle_count < RLE_MAX_COUNT) { - if (read_input(0) == last_byte) { - compressor->rle_count++; - compressor->input_pos = input_add(1); - compressor->input_size--; - } else { - break; - } + // Count RLE bytes in current buffer WITHOUT consuming yet + uint8_t rle_available = 0; + while (rle_available < compressor->input_size && compressor->rle_count + rle_available < RLE_MAX_COUNT && + compressor->input[input_add(rle_available)] == last_byte) { + rle_available++; } - // If we consumed whole buffer and haven't hit max, return (accumulate more) - if (compressor->input_size == 0 && compressor->rle_count < RLE_MAX_COUNT && compressor->rle_count > 0) { + uint8_t total_rle = compressor->rle_count + rle_available; + bool rle_ended = (rle_available < compressor->input_size) || (total_rle >= RLE_MAX_COUNT); + + // If RLE hasn't ended and we haven't hit max, consume and wait for more + if (!rle_ended && total_rle > 0) { + compressor->rle_count = total_rle; + compressor->input_pos = input_add(rle_available); + compressor->input_size -= rle_available; return TAMP_OK; } - // RLE run has ended - if (compressor->rle_count >= 2) { - // Commit the RLE (simplified approach for C) - write_rle_token(compressor, compressor->rle_count); + // RLE run has ended - decide between RLE and pattern match + if (total_rle >= 2) { + bool use_pattern = false; + + // For short RLE runs (all from this call), check if pattern match is better + if (total_rle == rle_available && total_rle <= 6) { + uint16_t pattern_index; + uint8_t pattern_size; + find_best_match(compressor, &pattern_index, &pattern_size); + + if (pattern_size > total_rle) { + use_pattern = true; + // Don't consume RLE bytes - fall through to pattern matching + } + } + + if (!use_pattern) { + // Use RLE - consume bytes and write token + compressor->input_pos = input_add(rle_available); + compressor->input_size -= rle_available; + write_rle_token(compressor, total_rle); + compressor->rle_count = 0; + return TAMP_OK; + } compressor->rle_count = 0; - return TAMP_OK; - } else if (compressor->rle_count == 1) { - // Single byte - push it back to input for normal literal encoding - compressor->input_pos = input_add(-1); - compressor->input_size++; + } else if (total_rle == 1) { + // Single byte - not worth RLE, will be handled as literal/pattern + // Byte is still in input buffer (not consumed), just reset RLE state compressor->rle_count = 0; } } From 57f0c48b35111b7f80ea2247fbc4db6e7bae27ed Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 13:06:14 -0500 Subject: [PATCH 080/109] match python compressor to C compressor implementation. --- tamp/compressor.py | 100 +++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 63 deletions(-) diff --git a/tamp/compressor.py b/tamp/compressor.py index 2451aa75..f59b2e39 100644 --- a/tamp/compressor.py +++ b/tamp/compressor.py @@ -273,90 +273,64 @@ def _compress_input_buffer_single(self) -> int: # RLE handling with persistent state (v2 only) # Accumulate RLE count across compression cycles for better compression of long runs - have_match_from_rle = False # Track if we already did pattern matching in RLE section - if self.extended: last_byte = self._window_buffer.last_written_byte - # Count additional matching bytes in current buffer - new_rle_bytes = 0 + # Count RLE bytes in current buffer WITHOUT consuming yet + rle_available = 0 for byte in self._input_buffer: - if byte == last_byte and self._rle_count + new_rle_bytes < self._rle_max_size: - new_rle_bytes += 1 + if byte == last_byte and self._rle_count + rle_available < self._rle_max_size: + rle_available += 1 else: break - # If we consumed whole buffer and haven't hit max, keep accumulating - if new_rle_bytes == len(self._input_buffer) and self._rle_count + new_rle_bytes < self._rle_max_size: - # Consume these bytes and wait for more data - for _ in range(new_rle_bytes): + total_rle = self._rle_count + rle_available + rle_ended = (rle_available < len(self._input_buffer)) or (total_rle >= self._rle_max_size) + + # If RLE hasn't ended and we haven't hit max, consume and wait for more + if not rle_ended and total_rle > 0: + self._rle_count = total_rle + for _ in range(rle_available): self._input_buffer.popleft() - self._rle_count += new_rle_bytes return bytes_written - # RLE run has ended or hit max - decide what to encode - total_rle_count = self._rle_count + new_rle_bytes + # RLE run has ended - decide between RLE and pattern match + if total_rle >= 2: + use_pattern = False - if total_rle_count >= 2: - # Build search target: accumulated RLE + new RLE + rest of buffer - # This allows pattern matching to find longer sequences - target = bytes([last_byte]) * total_rle_count + bytes(list(self._input_buffer)[new_rle_bytes:]) - - # Do pattern search - if self.lazy_matching and self._cached_match_index >= 0: - search_i = self._cached_match_index - match_size = self._cached_match_size - match = self._window_buffer.get(search_i, match_size) - self._cached_match_index = -1 - else: + # For short RLE runs (all from this call), check if pattern match is better + if total_rle == rle_available and total_rle <= 6: + target = bytes(self._input_buffer) search_i, match = self._search(target, start=0) match_size = len(match) - have_match_from_rle = True + if match_size > total_rle: + use_pattern = True + # Don't consume RLE bytes - fall through to pattern matching - # Simple decision: if pattern match is longer, use it; otherwise use RLE - if match_size >= self.min_pattern_size and match_size > total_rle_count: - # Pattern match wins - # Consume bytes from input_buffer (only the bytes actually in the buffer) - # Note: first _rle_count bytes were already consumed in previous calls - bytes_to_consume = match_size - self._rle_count - for _ in range(bytes_to_consume): - self._input_buffer.popleft() - self._rle_count = 0 - - # Write the pattern match immediately and return - # (Don't continue to normal flow which would try to consume bytes again) - if self.extended and match_size > (self.min_pattern_size + 11): - self._extended_match_position = search_i - self._extended_match_count = match_size - bytes_written += self._write_extended_match() - else: - bytes_written += self._write_match(search_i, match) - return bytes_written - else: - # RLE wins - commit RLE - for _ in range(new_rle_bytes): + if not use_pattern: + # Use RLE - consume bytes and write token + for _ in range(rle_available): self._input_buffer.popleft() - self._rle_count = total_rle_count + self._rle_count = total_rle bytes_written += self._write_rle() return bytes_written - elif self._rle_count == 1: - # Single byte isn't worth RLE encoding self._rle_count = 0 - # Fall through to normal pattern matching + elif total_rle == 1: + # Single byte - not worth RLE, will be handled as literal/pattern + self._rle_count = 0 - # Normal pattern matching (when no RLE or RLE was abandoned for pattern) - if not have_match_from_rle: - target = bytes(self._input_buffer) + # Normal pattern matching + target = bytes(self._input_buffer) - if self.lazy_matching and self._cached_match_index >= 0: - search_i = self._cached_match_index - match_size = self._cached_match_size - match = self._window_buffer.get(search_i, match_size) - self._cached_match_index = -1 - else: - search_i, match = self._search(target, start=0) - match_size = len(match) + if self.lazy_matching and self._cached_match_index >= 0: + search_i = self._cached_match_index + match_size = self._cached_match_size + match = self._window_buffer.get(search_i, match_size) + self._cached_match_index = -1 + else: + search_i, match = self._search(target, start=0) + match_size = len(match) # Lazy matching logic if ( From 161e47674dcfabd26d856e1d815612ab6c872942 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 13:14:49 -0500 Subject: [PATCH 081/109] update readme and expected hash --- .github/workflows/tests.yaml | 2 +- README.md | 60 ++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 79071a9b..1c44aa9f 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -267,7 +267,7 @@ jobs: implementation: [desktop, embedded] env: POETRY_HOME: '~/poetry' - EXPECTED_COMPRESSED_HASH: '5996293c04a89014580334da4d69374ee414f1ae5087b813bab69209573b6966' + EXPECTED_COMPRESSED_HASH: 'dd5e431b0cbaa6ee001b10493c2b08e6235f42e3f4ce00958e88b1b97581872e' steps: - name: Check out repository diff --git a/README.md b/README.md index 971a760d..39af81c3 100644 --- a/README.md +++ b/README.md @@ -219,20 +219,20 @@ idea of how these algorithms perform over a variety of input data types. | dataset | raw | tamp | tamp (LazyMatching) | zlib | heatshrink | | --------------- | ----------- | ----------- | ------------------- | ------------- | ---------- | -| enwik8 | 100,000,000 | 51,116,968 | **50,725,098** | 56,205,166 | 56,110,394 | -| RPI_PICO (.uf2) | 667,648 | **288,704** | 289,735 | 303,763 | - | -| silesia/dickens | 10,192,446 | 5,538,712 | **5,503,021** | 6,049,169 | 6,155,768 | -| silesia/mozilla | 51,220,480 | 24,499,954 | **24,311,290** | 25,104,966 | 25,435,908 | -| silesia/mr | 9,970,564 | 4,524,424 | **4,396,124** | 4,864,734 | 5,442,180 | -| silesia/nci | 33,553,445 | 7,093,354 | 7,003,632 | **5,765,521** | 8,247,487 | -| silesia/ooffice | 6,152,192 | 3,779,238 | **3,763,795** | 4,077,277 | 3,994,589 | -| silesia/osdb | 10,085,684 | 8,467,407 | **8,452,497** | 8,625,159 | 8,747,527 | -| silesia/reymont | 6,627,202 | 2,825,458 | **2,793,473** | 2,897,661 | 2,910,251 | -| silesia/samba | 21,606,400 | 8,443,932 | **8,395,048** | 8,862,423 | 9,223,827 | -| silesia/sao | 7,251,944 | 6,136,102 | **6,100,071** | 6,506,417 | 6,400,926 | -| silesia/webster | 41,458,703 | 18,259,149 | **18,118,788** | 20,212,235 | 19,942,817 | -| silesia/x-ray | 8,474,240 | 7,509,652 | 7,404,794 | **7,351,750** | 8,059,723 | -| silesia/xml | 5,345,280 | 1,493,131 | **1,473,832** | 1,586,985 | 1,665,179 | +| enwik8 | 100,000,000 | 51,017,102 | **50,626,118** | 56,205,166 | 56,110,394 | +| RPI_PICO (.uf2) | 667,648 | **289,204** | 290,442 | 303,763 | - | +| silesia/dickens | 10,192,446 | 5,538,353 | **5,502,834** | 6,049,169 | 6,155,768 | +| silesia/mozilla | 51,220,480 | 24,412,662 | **24,228,654** | 25,104,966 | 25,435,908 | +| silesia/mr | 9,970,564 | 4,519,402 | **4,393,009** | 4,864,734 | 5,442,180 | +| silesia/nci | 33,553,445 | 6,824,403 | 6,772,379 | **5,765,521** | 8,247,487 | +| silesia/ooffice | 6,152,192 | 3,773,089 | **3,755,153** | 4,077,277 | 3,994,589 | +| silesia/osdb | 10,085,684 | 8,466,875 | **8,464,328** | 8,625,159 | 8,747,527 | +| silesia/reymont | 6,627,202 | 2,818,554 | **2,788,774** | 2,897,661 | 2,910,251 | +| silesia/samba | 21,606,400 | 8,384,183 | **8,345,616** | 8,862,423 | 9,223,827 | +| silesia/sao | 7,251,944 | 6,136,077 | **6,100,061** | 6,506,417 | 6,400,926 | +| silesia/webster | 41,458,703 | 18,146,649 | **18,010,980** | 20,212,235 | 19,942,817 | +| silesia/x-ray | 8,474,240 | 7,509,449 | 7,404,794 | **7,351,750** | 8,059,723 | +| silesia/xml | 5,345,280 | 1,473,463 | **1,455,877** | 1,586,985 | 1,665,179 | Tamp outperforms both heatshrink and zlib on most datasets, winning 12 out of 14 benchmarks. This is while using around 10x less memory than zlib during both @@ -270,20 +270,20 @@ compression parameters across all benchmark datasets (`window=10`, `literal=8`). | dataset | raw | Baseline | +lazy | +extended | +lazy +extended | | --------------- | ----------- | ---------- | ------------------ | ------------------ | ------------------ | -| enwik8 | 100,000,000 | 51,635,633 | 51,252,113 (−0.7%) | 51,116,968 (−1.0%) | 50,725,098 (−1.8%) | -| RPI_PICO (.uf2) | 667,648 | 331,310 | 329,875 (−0.4%) | 288,704 (−12.9%) | 289,735 (−12.5%) | -| silesia/dickens | 10,192,446 | 5,546,761 | 5,511,604 (−0.6%) | 5,538,712 (−0.1%) | 5,503,021 (−0.8%) | -| silesia/mozilla | 51,220,480 | 25,121,385 | 24,936,067 (−0.7%) | 24,499,954 (−2.5%) | 24,311,290 (−3.2%) | -| silesia/mr | 9,970,564 | 5,027,032 | 4,886,272 (−2.8%) | 4,524,424 (−10.0%) | 4,396,124 (−12.6%) | -| silesia/nci | 33,553,445 | 8,643,610 | 8,645,299 (+0.0%) | 7,093,354 (−17.9%) | 7,003,632 (−19.0%) | -| silesia/ooffice | 6,152,192 | 3,814,938 | 3,798,261 (−0.4%) | 3,779,238 (−0.9%) | 3,763,795 (−1.3%) | -| silesia/osdb | 10,085,684 | 8,520,835 | 8,506,443 (−0.2%) | 8,467,407 (−0.6%) | 8,452,497 (−0.8%) | -| silesia/reymont | 6,627,202 | 2,847,981 | 2,820,870 (−1.0%) | 2,825,458 (−0.8%) | 2,793,473 (−1.9%) | -| silesia/samba | 21,606,400 | 9,102,594 | 9,060,692 (−0.5%) | 8,443,932 (−7.2%) | 8,395,048 (−7.8%) | -| silesia/sao | 7,251,944 | 6,137,755 | 6,101,744 (−0.6%) | 6,136,102 (−0.0%) | 6,100,071 (−0.6%) | -| silesia/webster | 41,458,703 | 18,694,172 | 18,567,228 (−0.7%) | 18,259,149 (−2.3%) | 18,118,788 (−3.1%) | -| silesia/x-ray | 8,474,240 | 7,510,606 | 7,405,814 (−1.4%) | 7,509,652 (−0.0%) | 7,404,794 (−1.4%) | -| silesia/xml | 5,345,280 | 1,681,687 | 1,672,660 (−0.5%) | 1,493,131 (−11.2%) | 1,473,832 (−12.4%) | +| enwik8 | 100,000,000 | 51,635,633 | 51,252,694 (−0.7%) | 51,017,102 (−1.2%) | 50,626,118 (−2.0%) | +| RPI_PICO (.uf2) | 667,648 | 331,310 | 329,893 (−0.4%) | 289,204 (−12.7%) | 290,442 (−12.3%) | +| silesia/dickens | 10,192,446 | 5,546,761 | 5,511,681 (−0.6%) | 5,538,353 (−0.2%) | 5,502,834 (−0.8%) | +| silesia/mozilla | 51,220,480 | 25,121,385 | 24,937,036 (−0.7%) | 24,412,662 (−2.8%) | 24,228,654 (−3.6%) | +| silesia/mr | 9,970,564 | 5,027,032 | 4,888,930 (−2.7%) | 4,519,402 (−10.1%) | 4,393,009 (−12.6%) | +| silesia/nci | 33,553,445 | 8,643,610 | 8,645,399 (+0.0%) | 6,824,403 (−21.0%) | 6,772,379 (−21.6%) | +| silesia/ooffice | 6,152,192 | 3,814,938 | 3,798,393 (−0.4%) | 3,773,089 (−1.1%) | 3,755,153 (−1.6%) | +| silesia/osdb | 10,085,684 | 8,520,835 | 8,518,502 (−0.0%) | 8,466,875 (−0.6%) | 8,464,328 (−0.7%) | +| silesia/reymont | 6,627,202 | 2,847,981 | 2,820,948 (−0.9%) | 2,818,554 (−1.0%) | 2,788,774 (−2.1%) | +| silesia/samba | 21,606,400 | 9,102,594 | 9,061,143 (−0.5%) | 8,384,183 (−7.9%) | 8,345,616 (−8.3%) | +| silesia/sao | 7,251,944 | 6,137,755 | 6,101,747 (−0.6%) | 6,136,077 (−0.0%) | 6,100,061 (−0.6%) | +| silesia/webster | 41,458,703 | 18,694,172 | 18,567,618 (−0.7%) | 18,146,649 (−2.9%) | 18,010,980 (−3.7%) | +| silesia/x-ray | 8,474,240 | 7,510,606 | 7,406,001 (−1.4%) | 7,509,449 (−0.0%) | 7,404,794 (−1.4%) | +| silesia/xml | 5,345,280 | 1,681,687 | 1,672,827 (−0.5%) | 1,473,463 (−12.4%) | 1,455,877 (−13.4%) | The `extended` parameter enables additional Huffman codes for longer pattern matches, which significantly improves compression on datasets with many long @@ -354,8 +354,8 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | Tamp (MicroPython Native) | 4708 | 4339 | 8124 | | Tamp (C, no extended, no stream) | 1708 | 1584 | 3088 | | Tamp (C, no extended) | 2112 | 2036 | 3944 | -| Tamp (C, extended, no stream) | 2768 | 2436 | 5000 | -| Tamp (C, extended) | 3172 | 2888 | 5856 | +| Tamp (C, extended, no stream) | 3516 | 2436 | 5748 | +| Tamp (C, extended) | 3920 | 2888 | 6604 | | Heatshrink (C) | 2956 | 3876 | 6832 | | uzlib (C) | 2355 | 3963 | 6318 | From c6f23f48019f6fdb35b34ede9f2ffb1f73080ea8 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 13:24:49 -0500 Subject: [PATCH 082/109] don't inline find_best_match --- README.md | 8 ++++---- tamp/_c_src/tamp/compressor.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 39af81c3..2a722f01 100644 --- a/README.md +++ b/README.md @@ -352,10 +352,10 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | | Compressor | Decompressor | Compressor + Decompressor | | -------------------------------- | ---------- | ------------ | ------------------------- | | Tamp (MicroPython Native) | 4708 | 4339 | 8124 | -| Tamp (C, no extended, no stream) | 1708 | 1584 | 3088 | -| Tamp (C, no extended) | 2112 | 2036 | 3944 | -| Tamp (C, extended, no stream) | 3516 | 2436 | 5748 | -| Tamp (C, extended) | 3920 | 2888 | 6604 | +| Tamp (C, no extended, no stream) | 1620 | 1584 | 3000 | +| Tamp (C, no extended) | 2024 | 2036 | 3856 | +| Tamp (C, extended, no stream) | 3072 | 2436 | 5304 | +| Tamp (C, extended) | 3476 | 2888 | 6160 | | Heatshrink (C) | 2956 | 3876 | 6832 | | uzlib (C) | 2355 | 3963 | 6318 | diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index d9c4cdb9..e08731ef 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -114,7 +114,7 @@ extern void find_best_match(TampCompressor *compressor, uint16_t *match_index, u * @param[out] match_index If match_size is 0, this value is undefined. * @param[out] match_size Size of best found match. */ -static inline void find_best_match(TampCompressor *compressor, uint16_t *match_index, uint8_t *match_size) { +static TAMP_NOINLINE void find_best_match(TampCompressor *compressor, uint16_t *match_index, uint8_t *match_size) { *match_size = 0; if (TAMP_UNLIKELY(compressor->input_size < compressor->min_pattern_size)) return; From aa59de5b8ba1b39ac318dadc50dd3c828d09170d Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 13:27:09 -0500 Subject: [PATCH 083/109] fix initialization warning --- tamp/_c_src/tamp/compressor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index e08731ef..4120681e 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -438,7 +438,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned } // Search for longer match (includes O(1) extension at same position) - uint16_t new_pos; + uint16_t new_pos = 0; uint8_t new_count; find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); From 538b8c0c4347556d679adddc34d66dc8a37b7d23 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 13:40:13 -0500 Subject: [PATCH 084/109] combine if-statement; saving 8 bytes. --- tamp/_c_src/tamp/compressor.c | 76 ++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 4120681e..cbc117a1 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -682,47 +682,49 @@ tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output } #if TAMP_EXTENDED_COMPRESS - // Extended: Flush any pending RLE - if (compressor->conf.extended && compressor->rle_count >= 1) { - // Partial flush first to make room - res = partial_flush(compressor, output, output_size, &chunk_output_written_size); - (*output_written_size) += chunk_output_written_size; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= chunk_output_written_size; - output += chunk_output_written_size; + if (compressor->conf.extended) { + // Flush any pending RLE + if (compressor->rle_count >= 1) { + // Partial flush first to make room + res = partial_flush(compressor, output, output_size, &chunk_output_written_size); + (*output_written_size) += chunk_output_written_size; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + output_size -= chunk_output_written_size; + output += chunk_output_written_size; - if (compressor->rle_count == 1) { - // Single byte - write as literal (can't use RLE token for count < 2) - uint8_t literal = get_last_window_byte(compressor); - write_to_bit_buffer(compressor, IS_LITERAL_FLAG | literal, compressor->conf.literal + 1); + if (compressor->rle_count == 1) { + // Single byte - write as literal (can't use RLE token for count < 2) + uint8_t literal = get_last_window_byte(compressor); + write_to_bit_buffer(compressor, IS_LITERAL_FLAG | literal, compressor->conf.literal + 1); - // Write to window - const uint16_t window_mask = (1 << compressor->conf.window) - 1; - compressor->window[compressor->window_pos] = literal; - compressor->window_pos = (compressor->window_pos + 1) & window_mask; - } else { - // count >= 2: write as RLE token - write_rle_token(compressor, compressor->rle_count); - } - compressor->rle_count = 0; + // Write to window + const uint16_t window_mask = (1 << compressor->conf.window) - 1; + compressor->window[compressor->window_pos] = literal; + compressor->window_pos = (compressor->window_pos + 1) & window_mask; + } else { + // count >= 2: write as RLE token + write_rle_token(compressor, compressor->rle_count); + } + compressor->rle_count = 0; - // Partial flush again after writing token - res = partial_flush(compressor, output, output_size, &chunk_output_written_size); - (*output_written_size) += chunk_output_written_size; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= chunk_output_written_size; - output += chunk_output_written_size; - } + // Partial flush again after writing token + res = partial_flush(compressor, output, output_size, &chunk_output_written_size); + (*output_written_size) += chunk_output_written_size; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + output_size -= chunk_output_written_size; + output += chunk_output_written_size; + } - // Extended: Flush any pending extended match - if (compressor->conf.extended && compressor->extended_match_count) { - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; - res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); - (*output_written_size) += chunk_output_written_size; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= chunk_output_written_size; - output += chunk_output_written_size; + // Flush any pending extended match + else if (compressor->extended_match_count) { + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; + res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); + (*output_written_size) += chunk_output_written_size; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + output_size -= chunk_output_written_size; + output += chunk_output_written_size; + } } #endif // TAMP_EXTENDED_COMPRESS From 66401acb279ca49cb9a2ae3b67719e7a33e671f1 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 14:05:35 -0500 Subject: [PATCH 085/109] save 48 bytes in tamp_compressor_flush using some gotos --- tamp/_c_src/tamp/compressor.c | 147 ++++++++++++++++------------------ 1 file changed, 69 insertions(+), 78 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index cbc117a1..800d6f54 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -40,7 +40,7 @@ static const uint8_t huffman_bits[] = {0x2, 0x3, 0x5, 0x5, 0x6, 0x7, 0x7, 0x7, 0 #define EXTENDED_MATCH_MIN_OUTPUT_BYTES 6 #endif -static TAMP_NOINLINE void write_to_bit_buffer(TampCompressor *compressor, uint32_t bits, uint8_t n_bits) { +static TAMP_NOINLINE void write_to_bit_buffer(TampCompressor* compressor, uint32_t bits, uint8_t n_bits) { compressor->bit_buffer_pos += n_bits; compressor->bit_buffer |= bits << (32 - compressor->bit_buffer_pos); } @@ -55,7 +55,7 @@ static TAMP_NOINLINE void write_to_bit_buffer(TampCompressor *compressor, uint32 * @param[in] value The value to encode. * @param[in] trailing_bits Number of trailing bits (3 for extended match, 4 for RLE). */ -static TAMP_NOINLINE void write_extended_huffman(TampCompressor *compressor, uint8_t value, uint8_t trailing_bits) { +static TAMP_NOINLINE void write_extended_huffman(TampCompressor* compressor, uint8_t value, uint8_t trailing_bits) { uint8_t code_index = value >> trailing_bits; // Write huffman code (without literal flag) + trailing bits in one call write_to_bit_buffer(compressor, (huffman_codes[code_index] << trailing_bits) | (value & ((1 << trailing_bits) - 1)), @@ -69,8 +69,8 @@ static TAMP_NOINLINE void write_extended_huffman(TampCompressor *compressor, uin * * Flushes complete bytes from the bit buffer. Up to 7 bits may remain. */ -static TAMP_NOINLINE tamp_res partial_flush(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size) { +static TAMP_NOINLINE tamp_res partial_flush(TampCompressor* compressor, unsigned char* output, size_t output_size, + size_t* output_written_size) { for (*output_written_size = output_size; compressor->bit_buffer_pos >= 8 && output_size; output_size--, compressor->bit_buffer_pos -= 8, compressor->bit_buffer <<= 8) *output++ = compressor->bit_buffer >> 24; @@ -78,7 +78,7 @@ static TAMP_NOINLINE tamp_res partial_flush(TampCompressor *compressor, unsigned return (compressor->bit_buffer_pos >= 8) ? TAMP_OUTPUT_FULL : TAMP_OK; } -inline bool tamp_compressor_full(const TampCompressor *compressor) { +inline bool tamp_compressor_full(const TampCompressor* compressor) { return compressor->input_size == sizeof(compressor->input); } @@ -99,7 +99,7 @@ inline bool tamp_compressor_full(const TampCompressor *compressor) { */ #if TAMP_ESP32 -extern void find_best_match(TampCompressor *compressor, uint16_t *match_index, uint8_t *match_size); +extern void find_best_match(TampCompressor* compressor, uint16_t* match_index, uint8_t* match_size); #elif (defined(__x86_64__) || defined(__aarch64__) || defined(_M_X64) || defined(_M_ARM64)) && !TAMP_USE_EMBEDDED_MATCH #include "compressor_find_match_desktop.c" @@ -114,7 +114,7 @@ extern void find_best_match(TampCompressor *compressor, uint16_t *match_index, u * @param[out] match_index If match_size is 0, this value is undefined. * @param[out] match_size Size of best found match. */ -static TAMP_NOINLINE void find_best_match(TampCompressor *compressor, uint16_t *match_index, uint8_t *match_size) { +static TAMP_NOINLINE void find_best_match(TampCompressor* compressor, uint16_t* match_index, uint8_t* match_size) { *match_size = 0; if (TAMP_UNLIKELY(compressor->input_size < compressor->min_pattern_size)) return; @@ -123,7 +123,7 @@ static TAMP_NOINLINE void find_best_match(TampCompressor *compressor, uint16_t * const uint8_t second_byte = read_input(1); const uint32_t window_size_minus_1 = WINDOW_SIZE - 1; const uint8_t max_pattern_size = MIN(compressor->input_size, MAX_PATTERN_SIZE); - const unsigned char *window = compressor->window; + const unsigned char* window = compressor->window; for (uint32_t window_index = 0; window_index < window_size_minus_1; window_index++) { if (TAMP_LIKELY(window[window_index] != first_byte)) { @@ -170,10 +170,10 @@ static TAMP_NOINLINE void find_best_match(TampCompressor *compressor, uint16_t * * @param[out] new_pos Position of found match (only valid if new_count > current_count) * @param[out] new_count Length of found match */ -static inline void find_extended_match(TampCompressor *compressor, uint16_t current_pos, uint8_t current_count, - uint16_t *new_pos, uint8_t *new_count) { +static inline void find_extended_match(TampCompressor* compressor, uint16_t current_pos, uint8_t current_count, + uint16_t* new_pos, uint8_t* new_count) { *new_count = 0; - const unsigned char *window = compressor->window; + const unsigned char* window = compressor->window; const uint16_t window_size = WINDOW_SIZE; const uint8_t max_pattern = MIN(current_count + compressor->input_size, MAX_PATTERN_SIZE); @@ -241,7 +241,7 @@ static inline bool validate_no_match_overlap(uint16_t write_pos, uint16_t match_ } #endif -tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, unsigned char *window) { +tamp_res tamp_compressor_init(TampCompressor* compressor, const TampConf* conf, unsigned char* window) { const TampConf conf_default = { .window = 10, .literal = 8, @@ -261,7 +261,7 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, #endif for (uint8_t i = 0; i < sizeof(TampCompressor); i++) // Zero-out the struct - ((unsigned char *)compressor)[i] = 0; + ((unsigned char*)compressor)[i] = 0; // Build header directly from conf (8 bits total) // Layout: [window:3][literal:2][use_custom_dictionary:1][extended:1][more_headers:1] @@ -287,7 +287,7 @@ tamp_res tamp_compressor_init(TampCompressor *compressor, const TampConf *conf, /** * @brief Get the last byte written to the window. */ -static inline uint8_t get_last_window_byte(TampCompressor *compressor) { +static inline uint8_t get_last_window_byte(TampCompressor* compressor) { uint16_t prev_pos = (compressor->window_pos - 1) & ((1 << compressor->conf.window) - 1); return compressor->window[prev_pos]; } @@ -298,7 +298,7 @@ static inline uint8_t get_last_window_byte(TampCompressor *compressor) { * @param[in,out] compressor Compressor state. * @param[in] count Number of repeated bytes (must be >= 2). */ -static TAMP_NOINLINE void write_rle_token(TampCompressor *compressor, uint8_t count) { +static TAMP_NOINLINE void write_rle_token(TampCompressor* compressor, uint8_t count) { const uint16_t window_mask = (1 << compressor->conf.window) - 1; uint8_t symbol = get_last_window_byte(compressor); @@ -329,8 +329,8 @@ static TAMP_NOINLINE void write_rle_token(TampCompressor *compressor, uint8_t co * @param[out] output_written_size Bytes written to output. * @return TAMP_OK on success, TAMP_OUTPUT_FULL if output buffer is too small. */ -static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compressor, unsigned char *output, - size_t output_size, size_t *output_written_size) { +static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compressor, unsigned char* output, + size_t output_size, size_t* output_written_size) { const uint16_t window_mask = (1 << compressor->conf.window) - 1; const uint8_t count = compressor->extended_match_count; const uint16_t position = compressor->extended_match_position; @@ -373,8 +373,8 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor *compres } #endif // TAMP_EXTENDED_COMPRESS -TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size) { +TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned char* output, size_t output_size, + size_t* output_written_size) { tamp_res res; // Cache bitfield values for faster access in hot path const uint8_t conf_window = compressor->conf.window; @@ -606,8 +606,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor *compressor, unsigned return TAMP_OK; } -void tamp_compressor_sink(TampCompressor *compressor, const unsigned char *input, size_t input_size, - size_t *consumed_size) { +void tamp_compressor_sink(TampCompressor* compressor, const unsigned char* input, size_t input_size, + size_t* consumed_size) { size_t consumed_size_proxy; if (TAMP_LIKELY(consumed_size)) *consumed_size = 0; @@ -622,9 +622,9 @@ void tamp_compressor_sink(TampCompressor *compressor, const unsigned char *input } } -tamp_res tamp_compressor_compress_cb(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size, const unsigned char *input, size_t input_size, - size_t *input_consumed_size, tamp_callback_t callback, void *user_data) { +tamp_res tamp_compressor_compress_cb(TampCompressor* compressor, unsigned char* output, size_t output_size, + size_t* output_written_size, const unsigned char* input, size_t input_size, + size_t* input_consumed_size, tamp_callback_t callback, void* user_data) { tamp_res res; size_t input_consumed_size_proxy = 0, output_written_size_proxy = 0; size_t total_input_size = input_size; @@ -663,8 +663,8 @@ tamp_res tamp_compressor_compress_cb(TampCompressor *compressor, unsigned char * return TAMP_OK; } -tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size, bool write_token) { +tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output, size_t output_size, + size_t* output_written_size, bool write_token) { tamp_res res; size_t chunk_output_written_size; size_t output_written_size_proxy; @@ -672,61 +672,52 @@ tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output if (!output_written_size) output_written_size = &output_written_size_proxy; *output_written_size = 0; - while (compressor->input_size) { - // Compress the remainder of the input buffer. +flush_check: + if (TAMP_LIKELY(compressor->input_size)) { res = tamp_compressor_poll(compressor, output, output_size, &chunk_output_written_size); + } +#if TAMP_EXTENDED_COMPRESS + else if (compressor->conf.extended && compressor->rle_count >= 1) { + // Partial flush first to make room + res = partial_flush(compressor, output, output_size, &chunk_output_written_size); (*output_written_size) += chunk_output_written_size; if (TAMP_UNLIKELY(res != TAMP_OK)) return res; output_size -= chunk_output_written_size; output += chunk_output_written_size; - } - -#if TAMP_EXTENDED_COMPRESS - if (compressor->conf.extended) { - // Flush any pending RLE - if (compressor->rle_count >= 1) { - // Partial flush first to make room - res = partial_flush(compressor, output, output_size, &chunk_output_written_size); - (*output_written_size) += chunk_output_written_size; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= chunk_output_written_size; - output += chunk_output_written_size; - - if (compressor->rle_count == 1) { - // Single byte - write as literal (can't use RLE token for count < 2) - uint8_t literal = get_last_window_byte(compressor); - write_to_bit_buffer(compressor, IS_LITERAL_FLAG | literal, compressor->conf.literal + 1); - - // Write to window - const uint16_t window_mask = (1 << compressor->conf.window) - 1; - compressor->window[compressor->window_pos] = literal; - compressor->window_pos = (compressor->window_pos + 1) & window_mask; - } else { - // count >= 2: write as RLE token - write_rle_token(compressor, compressor->rle_count); - } - compressor->rle_count = 0; - // Partial flush again after writing token - res = partial_flush(compressor, output, output_size, &chunk_output_written_size); - (*output_written_size) += chunk_output_written_size; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= chunk_output_written_size; - output += chunk_output_written_size; - } + if (compressor->rle_count == 1) { + // Single byte - write as literal (can't use RLE token for count < 2) + uint8_t literal = get_last_window_byte(compressor); + write_to_bit_buffer(compressor, IS_LITERAL_FLAG | literal, compressor->conf.literal + 1); - // Flush any pending extended match - else if (compressor->extended_match_count) { - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; - res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); - (*output_written_size) += chunk_output_written_size; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= chunk_output_written_size; - output += chunk_output_written_size; + // Write to window + const uint16_t window_mask = (1 << compressor->conf.window) - 1; + compressor->window[compressor->window_pos] = literal; + compressor->window_pos = (compressor->window_pos + 1) & window_mask; + } else { + // count >= 2: write as RLE token + write_rle_token(compressor, compressor->rle_count); } + compressor->rle_count = 0; + + // Partial flush again after writing token + res = partial_flush(compressor, output, output_size, &chunk_output_written_size); + } else if (compressor->conf.extended && compressor->extended_match_count) { + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; + res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); } #endif // TAMP_EXTENDED_COMPRESS + else { + goto flush_done; + } + (*output_written_size) += chunk_output_written_size; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + output_size -= chunk_output_written_size; + output += chunk_output_written_size; + goto flush_check; + +flush_done: // Perform partial flush to see if we need a FLUSH token (check if output buffer in not empty), // and to subsequently make room for the FLUSH token. @@ -760,10 +751,10 @@ tamp_res tamp_compressor_flush(TampCompressor *compressor, unsigned char *output return TAMP_OK; } -tamp_res tamp_compressor_compress_and_flush_cb(TampCompressor *compressor, unsigned char *output, size_t output_size, - size_t *output_written_size, const unsigned char *input, - size_t input_size, size_t *input_consumed_size, bool write_token, - tamp_callback_t callback, void *user_data) { +tamp_res tamp_compressor_compress_and_flush_cb(TampCompressor* compressor, unsigned char* output, size_t output_size, + size_t* output_written_size, const unsigned char* input, + size_t input_size, size_t* input_consumed_size, bool write_token, + tamp_callback_t callback, void* user_data) { tamp_res res; size_t flush_size; size_t output_written_size_proxy; @@ -786,9 +777,9 @@ tamp_res tamp_compressor_compress_and_flush_cb(TampCompressor *compressor, unsig #if TAMP_STREAM -tamp_res tamp_compress_stream(TampCompressor *compressor, tamp_read_t read_cb, void *read_handle, tamp_write_t write_cb, - void *write_handle, size_t *input_consumed_size, size_t *output_written_size, - tamp_callback_t callback, void *user_data) { +tamp_res tamp_compress_stream(TampCompressor* compressor, tamp_read_t read_cb, void* read_handle, tamp_write_t write_cb, + void* write_handle, size_t* input_consumed_size, size_t* output_written_size, + tamp_callback_t callback, void* user_data) { size_t input_consumed_size_proxy, output_written_size_proxy; if (!input_consumed_size) input_consumed_size = &input_consumed_size_proxy; if (!output_written_size) output_written_size = &output_written_size_proxy; From 8e710df60a11598dcd156356f828dbbd848f1226 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 14:10:16 -0500 Subject: [PATCH 086/109] save 12 more bytes --- tamp/_c_src/tamp/compressor.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 800d6f54..613a253c 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -699,9 +699,7 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output write_rle_token(compressor, compressor->rle_count); } compressor->rle_count = 0; - - // Partial flush again after writing token - res = partial_flush(compressor, output, output_size, &chunk_output_written_size); + chunk_output_written_size = 0; // Already handled above; flush_done will flush token bits } else if (compressor->conf.extended && compressor->extended_match_count) { // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; From e55710d15675fb34ef8fd198f44c0df51772442d Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 14:14:35 -0500 Subject: [PATCH 087/109] save 44 more bytes --- tamp/_c_src/tamp/compressor.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 613a253c..0de353d5 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -673,18 +673,18 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output *output_written_size = 0; flush_check: + // Flush pending bits before checking for more work + res = partial_flush(compressor, output, output_size, &chunk_output_written_size); + (*output_written_size) += chunk_output_written_size; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + output_size -= chunk_output_written_size; + output += chunk_output_written_size; + if (TAMP_LIKELY(compressor->input_size)) { res = tamp_compressor_poll(compressor, output, output_size, &chunk_output_written_size); } #if TAMP_EXTENDED_COMPRESS else if (compressor->conf.extended && compressor->rle_count >= 1) { - // Partial flush first to make room - res = partial_flush(compressor, output, output_size, &chunk_output_written_size); - (*output_written_size) += chunk_output_written_size; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= chunk_output_written_size; - output += chunk_output_written_size; - if (compressor->rle_count == 1) { // Single byte - write as literal (can't use RLE token for count < 2) uint8_t literal = get_last_window_byte(compressor); @@ -699,7 +699,7 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output write_rle_token(compressor, compressor->rle_count); } compressor->rle_count = 0; - chunk_output_written_size = 0; // Already handled above; flush_done will flush token bits + chunk_output_written_size = 0; } else if (compressor->conf.extended && compressor->extended_match_count) { // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; @@ -717,14 +717,6 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output flush_done: - // Perform partial flush to see if we need a FLUSH token (check if output buffer in not empty), - // and to subsequently make room for the FLUSH token. - res = partial_flush(compressor, output, output_size, &chunk_output_written_size); - output_size -= chunk_output_written_size; - (*output_written_size) += chunk_output_written_size; - output += chunk_output_written_size; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - // Check if there's enough output buffer space if (compressor->bit_buffer_pos) { if (output_size == 0) { From 65f9812077bcac0173367b3155ee47d3746ca04b Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 14:33:13 -0500 Subject: [PATCH 088/109] cleanup flush_done --- tamp/_c_src/tamp/compressor.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 0de353d5..d86a30a7 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -716,22 +716,25 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output goto flush_check; flush_done: - - // Check if there's enough output buffer space - if (compressor->bit_buffer_pos) { - if (output_size == 0) { - return TAMP_OUTPUT_FULL; - } - if (write_token) { - if (output_size < 2) return TAMP_OUTPUT_FULL; - write_to_bit_buffer(compressor, FLUSH_CODE, 9); - } + // At this point, up to 7 bits may remain in the compressor->bit_buffer + // The output buffer may have 0 bytes remaining. + if (write_token && compressor->bit_buffer_pos) { + // We don't want to write the FLUSH token to the bit_buffer unless + // we are confident that it'll wind up in the output buffer + // in THIS function call. + // Otherwise, if we wind up with a TAMP_OUTPUT_FULL result, we could + // end up accidentally writing multiple FLUSH tokens. + if (TAMP_UNLIKELY(output_size < 2)) return TAMP_OUTPUT_FULL; + write_to_bit_buffer(compressor, FLUSH_CODE, 9); } + // At this point, up to 16 bits may remain in the compressor->bit_buffer + // The output buffer may have 0 bytes remaining. + // Flush the remainder of the output bit-buffer while (compressor->bit_buffer_pos) { - *output = compressor->bit_buffer >> 24; - output++; + if (TAMP_UNLIKELY(output_size == 0)) return TAMP_OUTPUT_FULL; + *output++ = compressor->bit_buffer >> 24; compressor->bit_buffer <<= 8; compressor->bit_buffer_pos -= MIN(compressor->bit_buffer_pos, 8); output_size--; From 9aefc6e1df2dcb7ab8ae5e2b61f0925e11ee0149 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 14:58:56 -0500 Subject: [PATCH 089/109] update expected javascript hash --- .github/workflows/javascript.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/javascript.yaml b/.github/workflows/javascript.yaml index 00fdd5be..4dc0fa73 100644 --- a/.github/workflows/javascript.yaml +++ b/.github/workflows/javascript.yaml @@ -99,9 +99,9 @@ jobs: cd build HASH=$(sha256sum enwik8-js.tamp | cut -d' ' -f1) echo "Compression hash: $HASH" - if [ "$HASH" != "02e05af059a0040d641988075cf1dfc479a084f9a34b5c8a348354211c5fa038" ]; then + if [ "$HASH" != "dd5e431b0cbaa6ee001b10493c2b08e6235f42e3f4ce00958e88b1b97581872e" ]; then echo "❌ Hash mismatch!" - echo "Expected: 02e05af059a0040d641988075cf1dfc479a084f9a34b5c8a348354211c5fa038" + echo "Expected: dd5e431b0cbaa6ee001b10493c2b08e6235f42e3f4ce00958e88b1b97581872e" echo "Got: $HASH" exit 1 fi From f47b54740d44a4c0bd38ac175bb6d8f8cd6f3dd6 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 15:09:16 -0500 Subject: [PATCH 090/109] consolidate extended compression functions --- tamp/_c_src/tamp/compressor.c | 167 +++++++++++++++++----------------- 1 file changed, 81 insertions(+), 86 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index d86a30a7..d3e3f40f 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -45,25 +45,6 @@ static TAMP_NOINLINE void write_to_bit_buffer(TampCompressor* compressor, uint32 compressor->bit_buffer |= bits << (32 - compressor->bit_buffer_pos); } -#if TAMP_EXTENDED_COMPRESS -/** - * @brief Write extended huffman encoding (huffman + trailing bits). - * - * Used for both RLE count and extended match size encoding. - * - * @param[in,out] compressor Compressor with bit buffer. - * @param[in] value The value to encode. - * @param[in] trailing_bits Number of trailing bits (3 for extended match, 4 for RLE). - */ -static TAMP_NOINLINE void write_extended_huffman(TampCompressor* compressor, uint8_t value, uint8_t trailing_bits) { - uint8_t code_index = value >> trailing_bits; - // Write huffman code (without literal flag) + trailing bits in one call - write_to_bit_buffer(compressor, (huffman_codes[code_index] << trailing_bits) | (value & ((1 << trailing_bits) - 1)), - (huffman_bits[code_index] - 1) + trailing_bits); -} - -#endif // TAMP_EXTENDED_COMPRESS - /** * @brief Partially flush the internal bit buffer. * @@ -156,7 +137,88 @@ static TAMP_NOINLINE void find_best_match(TampCompressor* compressor, uint16_t* #endif +#if TAMP_LAZY_MATCHING +/** + * @brief Check if writing a single byte will overlap with a future match section. + * + * @param[in] write_pos Position where the single byte will be written. + * @param[in] match_index Index in window where the match starts. + * @param[in] match_size Size of the match to validate. + * @return true if no overlap (match is safe), false if there's overlap. + */ +static inline bool validate_no_match_overlap(uint16_t write_pos, uint16_t match_index, uint8_t match_size) { + // Check if write position falls within the match range [match_index, match_index + match_size - 1] + return write_pos < match_index || write_pos >= match_index + match_size; +} +#endif + +tamp_res tamp_compressor_init(TampCompressor* compressor, const TampConf* conf, unsigned char* window) { + const TampConf conf_default = { + .window = 10, + .literal = 8, + .use_custom_dictionary = false, +#if TAMP_LAZY_MATCHING + .lazy_matching = false, +#endif +#if TAMP_EXTENDED_COMPRESS + .extended = true, // Default to extended format +#endif + }; + if (!conf) conf = &conf_default; + if (conf->window < 8 || conf->window > 15) return TAMP_INVALID_CONF; + if (conf->literal < 5 || conf->literal > 8) return TAMP_INVALID_CONF; +#if !TAMP_EXTENDED_COMPRESS + if (conf->extended) return TAMP_INVALID_CONF; // Extended requested but not compiled in +#endif + + for (uint8_t i = 0; i < sizeof(TampCompressor); i++) // Zero-out the struct + ((unsigned char*)compressor)[i] = 0; + + // Build header directly from conf (8 bits total) + // Layout: [window:3][literal:2][use_custom_dictionary:1][extended:1][more_headers:1] + uint8_t header = ((conf->window - 8) << 5) | ((conf->literal - 5) << 3) | (conf->use_custom_dictionary << 2) | + (conf->extended << 1); + + compressor->conf = *conf; // Single struct copy + compressor->window = window; + compressor->min_pattern_size = tamp_compute_min_pattern_size(conf->window, conf->literal); + +#if TAMP_LAZY_MATCHING + compressor->cached_match_index = -1; // Initialize cache as invalid +#endif + + if (!conf->use_custom_dictionary) tamp_initialize_dictionary(window, (1 << conf->window)); + + write_to_bit_buffer(compressor, header, 8); + + return TAMP_OK; +} + #if TAMP_EXTENDED_COMPRESS +/** + * @brief Write extended huffman encoding (huffman + trailing bits). + * + * Used for both RLE count and extended match size encoding. + * + * @param[in,out] compressor Compressor with bit buffer. + * @param[in] value The value to encode. + * @param[in] trailing_bits Number of trailing bits (3 for extended match, 4 for RLE). + */ +static TAMP_NOINLINE void write_extended_huffman(TampCompressor* compressor, uint8_t value, uint8_t trailing_bits) { + uint8_t code_index = value >> trailing_bits; + // Write huffman code (without literal flag) + trailing bits in one call + write_to_bit_buffer(compressor, (huffman_codes[code_index] << trailing_bits) | (value & ((1 << trailing_bits) - 1)), + (huffman_bits[code_index] - 1) + trailing_bits); +} + +/** + * @brief Get the last byte written to the window. + */ +static inline uint8_t get_last_window_byte(TampCompressor* compressor) { + uint16_t prev_pos = (compressor->window_pos - 1) & ((1 << compressor->conf.window) - 1); + return compressor->window[prev_pos]; +} + /** * @brief Search for extended match continuation using implicit pattern comparison. * @@ -224,73 +286,6 @@ static inline void find_extended_match(TampCompressor* compressor, uint16_t curr } } } -#endif // TAMP_EXTENDED_COMPRESS - -#if TAMP_LAZY_MATCHING -/** - * @brief Check if writing a single byte will overlap with a future match section. - * - * @param[in] write_pos Position where the single byte will be written. - * @param[in] match_index Index in window where the match starts. - * @param[in] match_size Size of the match to validate. - * @return true if no overlap (match is safe), false if there's overlap. - */ -static inline bool validate_no_match_overlap(uint16_t write_pos, uint16_t match_index, uint8_t match_size) { - // Check if write position falls within the match range [match_index, match_index + match_size - 1] - return write_pos < match_index || write_pos >= match_index + match_size; -} -#endif - -tamp_res tamp_compressor_init(TampCompressor* compressor, const TampConf* conf, unsigned char* window) { - const TampConf conf_default = { - .window = 10, - .literal = 8, - .use_custom_dictionary = false, -#if TAMP_LAZY_MATCHING - .lazy_matching = false, -#endif -#if TAMP_EXTENDED_COMPRESS - .extended = true, // Default to extended format -#endif - }; - if (!conf) conf = &conf_default; - if (conf->window < 8 || conf->window > 15) return TAMP_INVALID_CONF; - if (conf->literal < 5 || conf->literal > 8) return TAMP_INVALID_CONF; -#if !TAMP_EXTENDED_COMPRESS - if (conf->extended) return TAMP_INVALID_CONF; // Extended requested but not compiled in -#endif - - for (uint8_t i = 0; i < sizeof(TampCompressor); i++) // Zero-out the struct - ((unsigned char*)compressor)[i] = 0; - - // Build header directly from conf (8 bits total) - // Layout: [window:3][literal:2][use_custom_dictionary:1][extended:1][more_headers:1] - uint8_t header = ((conf->window - 8) << 5) | ((conf->literal - 5) << 3) | (conf->use_custom_dictionary << 2) | - (conf->extended << 1); - - compressor->conf = *conf; // Single struct copy - compressor->window = window; - compressor->min_pattern_size = tamp_compute_min_pattern_size(conf->window, conf->literal); - -#if TAMP_LAZY_MATCHING - compressor->cached_match_index = -1; // Initialize cache as invalid -#endif - - if (!conf->use_custom_dictionary) tamp_initialize_dictionary(window, (1 << conf->window)); - - write_to_bit_buffer(compressor, header, 8); - - return TAMP_OK; -} - -#if TAMP_EXTENDED_COMPRESS -/** - * @brief Get the last byte written to the window. - */ -static inline uint8_t get_last_window_byte(TampCompressor* compressor) { - uint16_t prev_pos = (compressor->window_pos - 1) & ((1 << compressor->conf.window) - 1); - return compressor->window[prev_pos]; -} /** * @brief Write RLE token to bit buffer and update window. From ea38c62dbdfde8146d957b0b0b8bb7f1373fa3ba Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 15:23:17 -0500 Subject: [PATCH 091/109] simplify find_extended_match, deduplicate checks from caller --- tamp/_c_src/tamp/compressor.c | 44 +++++++++++------------------------ 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index d3e3f40f..5511c215 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -234,51 +234,33 @@ static inline uint8_t get_last_window_byte(TampCompressor* compressor) { */ static inline void find_extended_match(TampCompressor* compressor, uint16_t current_pos, uint8_t current_count, uint16_t* new_pos, uint8_t* new_count) { + // Preconditions (guaranteed by caller): + // - input_size > 0 + // - current_pos + current_count < WINDOW_SIZE + // - current_count < MAX_PATTERN_SIZE *new_count = 0; const unsigned char* window = compressor->window; const uint16_t window_size = WINDOW_SIZE; const uint8_t max_pattern = MIN(current_count + compressor->input_size, MAX_PATTERN_SIZE); - - // Need at least current_count + 1 to find a longer match, and room in window - if (max_pattern <= current_count) return; - if (current_pos + current_count + 1 > window_size) return; - - // First two bytes of pattern (from window at current_pos) - const uint8_t first_byte = window[current_pos]; - const uint8_t second_byte = window[current_pos + 1]; - - // The target byte to extend by (input[0], like Python does) const uint8_t extend_byte = read_input(0); - // Search candidates that can fit at least current_count + 1 bytes for (uint16_t cand = current_pos; cand + current_count + 1 <= window_size; cand++) { - // Quick 2-byte check - if (TAMP_LIKELY(window[cand] != first_byte)) continue; - if (TAMP_LIKELY(window[cand + 1] != second_byte)) continue; - - // Check if all current_count bytes match - bool full_match = true; - for (uint8_t i = 2; i < current_count; i++) { - if (window[cand + i] != window[current_pos + i]) { - full_match = false; - break; - } - } - if (!full_match) continue; - - // Check if the extension byte matches + // Check extension byte first (most discriminating) if (window[cand + current_count] != extend_byte) continue; - // Found a match of current_count + 1 bytes - now extend as far as possible + // Check if current_count bytes match (at cand==current_pos, compares with self) + uint8_t i = 0; + while (i < current_count && window[cand + i] == window[current_pos + i]) i++; + if (i < current_count) continue; + + // Found a match - extend as far as possible const uint8_t cand_max = MIN(max_pattern, window_size - cand); uint8_t match_len = current_count + 1; - for (uint8_t i = current_count + 1; i < cand_max; i++) { - uint8_t target = read_input(i - current_count); - if (window[cand + i] != target) break; + for (i = current_count + 1; i < cand_max; i++) { + if (window[cand + i] != read_input(i - current_count)) break; match_len = i + 1; } - // Track this match (guaranteed > current_count) if (match_len > *new_count) { *new_count = match_len; *new_pos = cand; From 178bf19f0199c53a1dacacd3dcc33ca49bcb5bf2 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 15:28:59 -0500 Subject: [PATCH 092/109] don't need to reset extended_match_position. --- tamp/_c_src/tamp/compressor.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 5511c215..5dc4fd86 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -342,9 +342,7 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compres uint8_t window_write = MIN(count, remaining); tamp_window_copy(compressor->window, &compressor->window_pos, position, window_write, window_mask); - // Reset extended match state - compressor->extended_match_count = 0; - compressor->extended_match_position = 0; + compressor->extended_match_count = 0; // Position reset not needed - only read when count > 0 return TAMP_OK; } From 330e5f4c85139964746630a17b9d31b883c79b53 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 15:52:01 -0500 Subject: [PATCH 093/109] combine if-statements --- tamp/_c_src/tamp/compressor.c | 94 +++++++++++++++++------------------ 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 5dc4fd86..f5787d68 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -381,28 +381,54 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned uint16_t match_index = 0; #if TAMP_EXTENDED_COMPRESS - // Extended: Handle extended match continuation - if (TAMP_UNLIKELY(conf_extended && compressor->extended_match_count)) { - // We're in extended match mode - try to extend the match - const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; + if (TAMP_UNLIKELY(conf_extended)) { + // Handle extended match continuation + if (compressor->extended_match_count) { + // We're in extended match mode - try to extend the match + const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; + + while (compressor->input_size > 0) { + const uint16_t current_pos = compressor->extended_match_position; + const uint8_t current_count = compressor->extended_match_count; + + // Check if extending would go beyond window buffer boundary (no wrap-around) + if (current_pos + current_count >= WINDOW_SIZE) { + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; + size_t token_bytes; + res = write_extended_match_token(compressor, output, output_size, &token_bytes); + (*output_written_size) += token_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + return TAMP_OK; + } - while (compressor->input_size > 0) { - const uint16_t current_pos = compressor->extended_match_position; - const uint8_t current_count = compressor->extended_match_count; + // Check if we've reached max extended match size + if (current_count >= max_ext_match) { + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; + size_t token_bytes; + res = write_extended_match_token(compressor, output, output_size, &token_bytes); + (*output_written_size) += token_bytes; + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + return TAMP_OK; + } - // Check if extending would go beyond window buffer boundary (no wrap-around) - if (current_pos + current_count >= WINDOW_SIZE) { - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; - size_t token_bytes; - res = write_extended_match_token(compressor, output, output_size, &token_bytes); - (*output_written_size) += token_bytes; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - return TAMP_OK; - } + // Search for longer match (includes O(1) extension at same position) + uint16_t new_pos = 0; + uint8_t new_count; + find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); + + if (new_count > current_count) { + // Found longer match - update and continue + uint8_t extra_bytes = new_count - current_count; + compressor->extended_match_position = new_pos; + compressor->extended_match_count = new_count; + compressor->input_pos = input_add(extra_bytes); + compressor->input_size -= extra_bytes; + continue; + } - // Check if we've reached max extended match size - if (current_count >= max_ext_match) { + // No longer match found - emit current match // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; size_t token_bytes; @@ -411,37 +437,11 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned if (TAMP_UNLIKELY(res != TAMP_OK)) return res; return TAMP_OK; } - - // Search for longer match (includes O(1) extension at same position) - uint16_t new_pos = 0; - uint8_t new_count; - find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); - - if (new_count > current_count) { - // Found longer match - update and continue - uint8_t extra_bytes = new_count - current_count; - compressor->extended_match_position = new_pos; - compressor->extended_match_count = new_count; - compressor->input_pos = input_add(extra_bytes); - compressor->input_size -= extra_bytes; - continue; - } - - // No longer match found - emit current match - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; - size_t token_bytes; - res = write_extended_match_token(compressor, output, output_size, &token_bytes); - (*output_written_size) += token_bytes; - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; + // Ran out of input while extending - return and wait for more return TAMP_OK; } - // Ran out of input while extending - return and wait for more - return TAMP_OK; - } - // Extended: Handle RLE accumulation with persistent state - if (TAMP_UNLIKELY(conf_extended)) { + // Handle RLE accumulation with persistent state uint8_t last_byte = get_last_window_byte(compressor); // Count RLE bytes in current buffer WITHOUT consuming yet From 83d3d8ed09c7f08f64c8262c9afc5ebb9e0adafd Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 16:13:00 -0500 Subject: [PATCH 094/109] consolidate write_extneded_match_token output size checks. --- tamp/_c_src/tamp/compressor.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index f5787d68..c74da71c 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -308,14 +308,17 @@ static TAMP_NOINLINE void write_rle_token(TampCompressor* compressor, uint8_t co */ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compressor, unsigned char* output, size_t output_size, size_t* output_written_size) { + *output_written_size = 0; + + // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) + if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; + const uint16_t window_mask = (1 << compressor->conf.window) - 1; const uint8_t count = compressor->extended_match_count; const uint16_t position = compressor->extended_match_position; tamp_res res; size_t flush_bytes; - *output_written_size = 0; - // Write symbol (7 bits) + extended huffman (up to 11 bits) = 18 bits max // With ≤7 bits already in buffer, total ≤25 bits - fits in 32-bit buffer write_to_bit_buffer(compressor, huffman_codes[TAMP_EXTENDED_MATCH_SYMBOL], @@ -393,8 +396,6 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned // Check if extending would go beyond window buffer boundary (no wrap-around) if (current_pos + current_count >= WINDOW_SIZE) { - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; size_t token_bytes; res = write_extended_match_token(compressor, output, output_size, &token_bytes); (*output_written_size) += token_bytes; @@ -404,8 +405,6 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned // Check if we've reached max extended match size if (current_count >= max_ext_match) { - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; size_t token_bytes; res = write_extended_match_token(compressor, output, output_size, &token_bytes); (*output_written_size) += token_bytes; @@ -429,8 +428,6 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned } // No longer match found - emit current match - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; size_t token_bytes; res = write_extended_match_token(compressor, output, output_size, &token_bytes); (*output_written_size) += token_bytes; @@ -676,8 +673,6 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output compressor->rle_count = 0; chunk_output_written_size = 0; } else if (compressor->conf.extended && compressor->extended_match_count) { - // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); } #endif // TAMP_EXTENDED_COMPRESS From d0c2b36001fd27ad984defa13997802a3e2d67e6 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 16:35:57 -0500 Subject: [PATCH 095/109] consolidate output arithmatic to partial_flush. --- tamp/_c_src/tamp/compressor.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index c74da71c..5e471b0f 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -50,12 +50,15 @@ static TAMP_NOINLINE void write_to_bit_buffer(TampCompressor* compressor, uint32 * * Flushes complete bytes from the bit buffer. Up to 7 bits may remain. */ -static TAMP_NOINLINE tamp_res partial_flush(TampCompressor* compressor, unsigned char* output, size_t output_size, +static TAMP_NOINLINE tamp_res partial_flush(TampCompressor* compressor, unsigned char** output, size_t* output_size, size_t* output_written_size) { - for (*output_written_size = output_size; compressor->bit_buffer_pos >= 8 && output_size; - output_size--, compressor->bit_buffer_pos -= 8, compressor->bit_buffer <<= 8) - *output++ = compressor->bit_buffer >> 24; - *output_written_size -= output_size; + while (compressor->bit_buffer_pos >= 8 && *output_size) { + *(*output)++ = compressor->bit_buffer >> 24; + (*output_size)--; + (*output_written_size)++; + compressor->bit_buffer_pos -= 8; + compressor->bit_buffer <<= 8; + } return (compressor->bit_buffer_pos >= 8) ? TAMP_OUTPUT_FULL : TAMP_OK; } @@ -317,7 +320,6 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compres const uint8_t count = compressor->extended_match_count; const uint16_t position = compressor->extended_match_position; tamp_res res; - size_t flush_bytes; // Write symbol (7 bits) + extended huffman (up to 11 bits) = 18 bits max // With ≤7 bits already in buffer, total ≤25 bits - fits in 32-bit buffer @@ -326,18 +328,14 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compres write_extended_huffman(compressor, count - compressor->min_pattern_size - 11 - 1, TAMP_LEADING_EXTENDED_MATCH_BITS); // Flush to make room for window position (up to 15 bits) - res = partial_flush(compressor, output, output_size, &flush_bytes); - *output_written_size += flush_bytes; - output += flush_bytes; - output_size -= flush_bytes; + res = partial_flush(compressor, &output, &output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; // Write window position - with ≤7 bits remaining, up to 22 bits total - fits write_to_bit_buffer(compressor, position, compressor->conf.window); // Final flush - res = partial_flush(compressor, output, output_size, &flush_bytes); - *output_written_size += flush_bytes; + res = partial_flush(compressor, &output, &output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; // Write to window (up to end of buffer, no wrap) @@ -370,12 +368,8 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned { // Make sure there's enough room in the bit buffer. - size_t flush_bytes_written; - res = partial_flush(compressor, output, output_size, &flush_bytes_written); - (*output_written_size) += flush_bytes_written; + res = partial_flush(compressor, &output, &output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= flush_bytes_written; - output += flush_bytes_written; // cppcheck-suppress unreadVariable } if (TAMP_UNLIKELY(output_size == 0)) return TAMP_OUTPUT_FULL; @@ -413,7 +407,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned } // Search for longer match (includes O(1) extension at same position) - uint16_t new_pos = 0; + uint16_t new_pos; uint8_t new_count; find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); @@ -646,11 +640,8 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output flush_check: // Flush pending bits before checking for more work - res = partial_flush(compressor, output, output_size, &chunk_output_written_size); - (*output_written_size) += chunk_output_written_size; + res = partial_flush(compressor, &output, &output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - output_size -= chunk_output_written_size; - output += chunk_output_written_size; if (TAMP_LIKELY(compressor->input_size)) { res = tamp_compressor_poll(compressor, output, output_size, &chunk_output_written_size); From 7abf720fd4da81b066755f8011f3ee25f25dd441 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 16:43:57 -0500 Subject: [PATCH 096/109] consolidate output arithmatic to write_extended_match_token. --- tamp/_c_src/tamp/compressor.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 5e471b0f..658a7157 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -304,17 +304,15 @@ static TAMP_NOINLINE void write_rle_token(TampCompressor* compressor, uint8_t co * Total: up to 33 bits. We flush after symbol+huffman (18 bits max) to ensure window_pos fits. * * @param[in,out] compressor Compressor state. - * @param[out] output Output buffer for flushed bytes. - * @param[in] output_size Available space in output buffer. - * @param[out] output_written_size Bytes written to output. + * @param[in,out] output Output buffer pointer (updated on return). + * @param[in,out] output_size Available space (updated on return). + * @param[in,out] output_written_size Bytes written (accumulated). * @return TAMP_OK on success, TAMP_OUTPUT_FULL if output buffer is too small. */ -static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compressor, unsigned char* output, - size_t output_size, size_t* output_written_size) { - *output_written_size = 0; - +static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compressor, unsigned char** output, + size_t* output_size, size_t* output_written_size) { // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) - if (TAMP_UNLIKELY(output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; + if (TAMP_UNLIKELY(*output_size < EXTENDED_MATCH_MIN_OUTPUT_BYTES)) return TAMP_OUTPUT_FULL; const uint16_t window_mask = (1 << compressor->conf.window) - 1; const uint8_t count = compressor->extended_match_count; @@ -328,14 +326,14 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compres write_extended_huffman(compressor, count - compressor->min_pattern_size - 11 - 1, TAMP_LEADING_EXTENDED_MATCH_BITS); // Flush to make room for window position (up to 15 bits) - res = partial_flush(compressor, &output, &output_size, output_written_size); + res = partial_flush(compressor, output, output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; // Write window position - with ≤7 bits remaining, up to 22 bits total - fits write_to_bit_buffer(compressor, position, compressor->conf.window); // Final flush - res = partial_flush(compressor, &output, &output_size, output_written_size); + res = partial_flush(compressor, output, output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; // Write to window (up to end of buffer, no wrap) @@ -390,18 +388,14 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned // Check if extending would go beyond window buffer boundary (no wrap-around) if (current_pos + current_count >= WINDOW_SIZE) { - size_t token_bytes; - res = write_extended_match_token(compressor, output, output_size, &token_bytes); - (*output_written_size) += token_bytes; + res = write_extended_match_token(compressor, &output, &output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; return TAMP_OK; } // Check if we've reached max extended match size if (current_count >= max_ext_match) { - size_t token_bytes; - res = write_extended_match_token(compressor, output, output_size, &token_bytes); - (*output_written_size) += token_bytes; + res = write_extended_match_token(compressor, &output, &output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; return TAMP_OK; } @@ -422,9 +416,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned } // No longer match found - emit current match - size_t token_bytes; - res = write_extended_match_token(compressor, output, output_size, &token_bytes); - (*output_written_size) += token_bytes; + res = write_extended_match_token(compressor, &output, &output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; return TAMP_OK; } @@ -640,6 +632,7 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output flush_check: // Flush pending bits before checking for more work + chunk_output_written_size = 0; res = partial_flush(compressor, &output, &output_size, output_written_size); if (TAMP_UNLIKELY(res != TAMP_OK)) return res; @@ -662,9 +655,8 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output write_rle_token(compressor, compressor->rle_count); } compressor->rle_count = 0; - chunk_output_written_size = 0; } else if (compressor->conf.extended && compressor->extended_match_count) { - res = write_extended_match_token(compressor, output, output_size, &chunk_output_written_size); + res = write_extended_match_token(compressor, &output, &output_size, output_written_size); } #endif // TAMP_EXTENDED_COMPRESS else { From 750b00287d16c20b29fba93fec400527010b4d16 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 16:44:52 -0500 Subject: [PATCH 097/109] update docstrings --- tamp/_c_src/tamp/compressor.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 658a7157..6779a67f 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -49,6 +49,12 @@ static TAMP_NOINLINE void write_to_bit_buffer(TampCompressor* compressor, uint32 * @brief Partially flush the internal bit buffer. * * Flushes complete bytes from the bit buffer. Up to 7 bits may remain. + * + * @param[in,out] compressor Compressor state. + * @param[in,out] output Output buffer pointer (updated on return). + * @param[in,out] output_size Available space (updated on return). + * @param[in,out] output_written_size Bytes written (accumulated). + * @return TAMP_OK on success, TAMP_OUTPUT_FULL if output buffer is too small. */ static TAMP_NOINLINE tamp_res partial_flush(TampCompressor* compressor, unsigned char** output, size_t* output_size, size_t* output_written_size) { From f68aa6ff4210372fd678952d0e75dd57ccaf1104 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 17:00:52 -0500 Subject: [PATCH 098/109] get rid of useless brackets --- tamp/_c_src/tamp/compressor.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 6779a67f..6181c5bd 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -370,11 +370,9 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned if (TAMP_UNLIKELY(compressor->input_size == 0)) return TAMP_OK; - { - // Make sure there's enough room in the bit buffer. - res = partial_flush(compressor, &output, &output_size, output_written_size); - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - } + // Make sure there's enough room in the bit buffer. + res = partial_flush(compressor, &output, &output_size, output_written_size); + if (TAMP_UNLIKELY(res != TAMP_OK)) return res; if (TAMP_UNLIKELY(output_size == 0)) return TAMP_OUTPUT_FULL; From f8b1bfc990c2d1eef2be89b7cb2208587bff2a39 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 19:49:14 -0500 Subject: [PATCH 099/109] update CLAUDE.md --- CLAUDE.md | 111 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 1dd02c31..5ba7e8cd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,9 +26,13 @@ different platforms: **Shared C Source:** All implementations use the same C source code in `tamp/_c_src/tamp/`: -- `common.h/c` - Shared utilities and data structures -- `compressor.h/c` - Compression implementation +- `common.h/c` - Shared utilities, data structures, stream I/O callbacks, and + dictionary initialization +- `compressor.h/c` - Compression implementation (sink/poll low-level API and + higher-level compress/flush API) - `decompressor.h/c` - Decompression implementation +- `compressor_find_match_desktop.c` - Desktop-optimized match finding (included + by `compressor.c` on non-embedded targets) ## Development Commands @@ -162,38 +166,81 @@ make website-clean # Clean website build artifacts **WebAssembly Build Process:** 1. `wasm/Makefile` compiles C source to WebAssembly using Emscripten -2. `wasm/scripts/build.js` generates multiple JS/TS distribution formats +2. `tsup` (via `npm run build:js`) bundles into multiple JS/TS distribution + formats (CJS, ESM, `.d.ts`) 3. Exports specific C functions and runtime methods for JS interop -**Configuration Flags:** +**Configuration Flags (compile-time `-D` defines):** -- `TAMP_LAZY_MATCHING=1` - Enable lazy matching optimization (default) -- `TAMP_ESP32=1` - ESP32-specific optimizations +- `TAMP_LAZY_MATCHING=1` - Enable lazy matching optimization (default in + build.py) +- `TAMP_ESP32=1` - ESP32-specific optimizations (avoids bitfields for speed) - `TAMP_COMPRESSOR`/`TAMP_DECOMPRESSOR` - Include/exclude components +- `TAMP_EXTENDED=1` - Master switch for extended format: RLE and extended match + (default: 1). `TAMP_EXTENDED_COMPRESS` and `TAMP_EXTENDED_DECOMPRESS` can + individually override. +- `TAMP_STREAM=1` - Include stream API (default: 1). Disable with + `-DTAMP_STREAM=0` to save ~2.8KB. +- `TAMP_STREAM_WORK_BUFFER_SIZE=32` - Stack-allocated work buffer for stream API + (default: 32 bytes, 256+ recommended for performance) +- `TAMP_STREAM_MEMORY` / `TAMP_STREAM_STDIO` / `TAMP_STREAM_LITTLEFS` / + `TAMP_STREAM_FATFS` - Enable built-in I/O handlers for specific backends +- `TAMP_USE_EMBEDDED_MATCH=1` - Force embedded `find_best_match` implementation + on desktop (for testing) + +**Build Environment Variables (Python):** + +- `TAMP_SANITIZE=1` - Enable AddressSanitizer + UBSan +- `TAMP_PROFILE=1` - Enable profiling (line trace, debug info) +- `TAMP_USE_EMBEDDED_MATCH=1` - Force embedded match finding +- `TAMP_BUILD_C_EXTENSIONS=0` - Skip building C extensions entirely +- `CIBUILDWHEEL=1` - CI wheel building mode (disables allowed_to_fail) ### Testing Strategy **Multi-layered Testing:** -- **Python tests** (`tests/`) - Core algorithm testing using pytest +- **Python tests** (`tests/`) - Core algorithm testing using pytest. Includes + bit reader/writer, compressor, decompressor, round-trip, CLI, dataset + regression, and file interface tests. - **WebAssembly tests** (`wasm/test/`) - JS/TS API testing with Node.js test - runner + runner (`node --test`) - **C tests** (`ctests/`) - Low-level C API testing using Unity framework + (submodule at `ctests/Unity/`). Includes stream API tests and filesystem + integration tests with LittleFS and FatFS RAM backends. - **Integration tests** - Cross-platform compatibility and performance benchmarks **Test Data Sources:** -- Enwik8 dataset (100MB) for performance benchmarking -- Silesia corpus for compression ratio evaluation +- Enwik8 dataset (100MB) for performance benchmarking (`make download-enwik8`) +- Silesia corpus for compression ratio evaluation (`make download-silesia`) - Custom test cases for edge conditions +### Compressor Architecture + +The C compressor uses a two-phase low-level API: + +1. `tamp_compressor_sink()` - Copies input bytes into a 16-byte internal ring + buffer (cheap/fast) +2. `tamp_compressor_poll()` - Runs one compression iteration on the internal + buffer (computationally intensive) + +Higher-level convenience functions (`tamp_compressor_compress`, +`tamp_compressor_compress_and_flush`) wrap these. Callback variants (`_cb` +suffix) accept a `tamp_callback_t` progress callback. + +The stream API (`tamp_compress_stream`, `tamp_decompress_stream`) provides a +file-oriented interface using read/write callbacks, supporting multiple I/O +backends (memory, stdio, LittleFS, FatFS). + ### Memory Management Patterns **Key Principle:** Fixed memory usage during compression/decompression - Window size determines memory usage: `(1 << windowBits)` bytes - No dynamic allocation during compression/decompression operations +- Stream API uses a stack-allocated work buffer (`TAMP_STREAM_WORK_BUFFER_SIZE`) - Streaming interfaces require explicit resource management (`destroy()` calls in JS/TS) @@ -202,7 +249,9 @@ make website-clean # Clean website build artifacts ### Making Changes to Core Algorithm 1. **Modify C source** in `tamp/_c_src/tamp/` -2. **Rebuild all implementations:** +2. **Update pure Python reference** in `tamp/compressor.py` / + `tamp/decompressor.py` to match +3. **Rebuild all implementations:** ```bash # Python @@ -212,11 +261,12 @@ make website-clean # Clean website build artifacts cd wasm && npm run build ``` -3. **Run comprehensive tests:** +4. **Run comprehensive tests:** ```bash - make test # Python + MicroPython + poetry run pytest # Python tests + make c-test # C unit tests with sanitizers + make c-test-embedded # C tests with embedded match finding cd wasm && npm test # WebAssembly - make c-test # C unit tests ``` ### Adding New Features @@ -232,11 +282,13 @@ make website-clean # Clean website build artifacts - **Use provided benchmarking tools:** ```bash make on-device-compression-benchmark # MicroPython performance - npm run test:enwik8 # WebAssembly performance - python tools/performance-benchmark.sh # Python performance + cd wasm && npm run test:enwik8 # WebAssembly performance + bash tools/performance-benchmark.sh # Python performance + make c-benchmark-stream # C stream API benchmark + make binary-size # ARM binary size table ``` -- **Profile with:** `tools/profiler.py` for Python, browser dev tools for - WebAssembly +- **Profile with:** `tools/profiler.py` for Python (requires `TAMP_PROFILE=1`), + browser dev tools for WebAssembly ### Release Process @@ -247,6 +299,29 @@ make website-clean # Clean website build artifacts - WebAssembly npm package 3. **CI/CD handles** cross-platform builds and testing +### Python Import Fallback Chain + +`tamp/__init__.py` imports Compressor/Decompressor using this priority: + +1. Viper (MicroPython optimized) - only available on MicroPython +2. Cython C extensions (`_c_compressor`/`_c_decompressor`) - primary on CPython +3. Pure Python reference (`compressor.py`/`decompressor.py`) - fallback + +When modifying compression behavior, changes to the C source must be mirrored in +the pure Python reference implementation to keep them in sync. + +### CI/CD + +GitHub Actions workflows (`.github/workflows/`): + +- `tests.yaml` - Lint (ruff, pre-commit) and test across Python 3.9/3.12/3.13 + and multiple OS. Also runs `c-test` and `c-test-embedded`. +- `build_wheels.yaml` - Cross-platform wheel builds via cibuildwheel +- `javascript.yaml` - WebAssembly tests on Node 18/20 +- `mpy_native_module.yaml` - MicroPython native module builds for ARM + architectures +- `esp_upload_component.yml` - ESP-IDF component registry upload + ## Documentation Style - Avoid "fake" subsections (e.g., bold text like `**Error Promotion:**` acting From 238ec2dc70352dd9ae7db01154ad65aff65f650e Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 19:54:17 -0500 Subject: [PATCH 100/109] note: always inline refill_bit_buffer. --- tamp/_c_src/tamp/decompressor.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 23ab4ed1..3b9a568a 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -316,6 +316,9 @@ tamp_res tamp_decompressor_init(TampDecompressor* decompressor, const TampConf* * @brief Refill bit buffer from input stream. * * Consumes bytes from input until bit_buffer has at least 25 bits or input is exhausted. + * + * NOTE: NOINLINE saves ~192 bytes on armv6m but causes ~10% decompression + * speed regression. Keep this inlined for performance. */ static inline void refill_bit_buffer(TampDecompressor* d, const unsigned char** input, const unsigned char* input_end, size_t* input_consumed_size) { From 1ab913e0ea2abe1e642c485f57626bf5eaada111 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 19:57:02 -0500 Subject: [PATCH 101/109] further flushing optimization. --- tamp/_c_src/tamp/compressor.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 6181c5bd..f080809e 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -222,8 +222,10 @@ static TAMP_NOINLINE void write_extended_huffman(TampCompressor* compressor, uin /** * @brief Get the last byte written to the window. + * + * NOINLINE: called from 3 sites; outlining saves ~44 bytes on armv6m. */ -static inline uint8_t get_last_window_byte(TampCompressor* compressor) { +static TAMP_NOINLINE uint8_t get_last_window_byte(TampCompressor* compressor) { uint16_t prev_pos = (compressor->window_pos - 1) & ((1 << compressor->conf.window) - 1); return compressor->window[prev_pos]; } @@ -688,17 +690,17 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output // At this point, up to 16 bits may remain in the compressor->bit_buffer // The output buffer may have 0 bytes remaining. - // Flush the remainder of the output bit-buffer - while (compressor->bit_buffer_pos) { + // Flush whole bytes, then write trailing partial byte + res = partial_flush(compressor, &output, &output_size, output_written_size); + if (compressor->bit_buffer_pos) { if (TAMP_UNLIKELY(output_size == 0)) return TAMP_OUTPUT_FULL; - *output++ = compressor->bit_buffer >> 24; - compressor->bit_buffer <<= 8; - compressor->bit_buffer_pos -= MIN(compressor->bit_buffer_pos, 8); - output_size--; + *output = compressor->bit_buffer >> 24; (*output_written_size)++; + compressor->bit_buffer_pos = 0; + compressor->bit_buffer = 0; } - return TAMP_OK; + return res; } tamp_res tamp_compressor_compress_and_flush_cb(TampCompressor* compressor, unsigned char* output, size_t output_size, From 627cb38a2b2895832e00892d18dcea83601befa7 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 20:19:10 -0500 Subject: [PATCH 102/109] TAMP_OPTIMIZE_SIZE macro --- tamp/_c_src/tamp/common.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tamp/_c_src/tamp/common.h b/tamp/_c_src/tamp/common.h index 706ce0fa..0e5bec7e 100644 --- a/tamp/_c_src/tamp/common.h +++ b/tamp/_c_src/tamp/common.h @@ -42,12 +42,19 @@ extern "C" { #if defined(_MSC_VER) #define TAMP_ALWAYS_INLINE __forceinline #define TAMP_NOINLINE __declspec(noinline) -#elif defined(__GNUC__) || defined(__clang__) +#define TAMP_OPTIMIZE_SIZE /* not supported */ +#elif defined(__GNUC__) && !defined(__clang__) #define TAMP_ALWAYS_INLINE inline __attribute__((always_inline)) #define TAMP_NOINLINE __attribute__((noinline)) +#define TAMP_OPTIMIZE_SIZE __attribute__((optimize("Os"))) +#elif defined(__clang__) +#define TAMP_ALWAYS_INLINE inline __attribute__((always_inline)) +#define TAMP_NOINLINE __attribute__((noinline)) +#define TAMP_OPTIMIZE_SIZE /* clang doesn't support per-function optimize */ #else #define TAMP_ALWAYS_INLINE inline #define TAMP_NOINLINE +#define TAMP_OPTIMIZE_SIZE #endif /* Include stream API (tamp_compress_stream, tamp_decompress_stream). From 95c770a77921b6f0e2fb010b592f34c771bcf72c Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 20:29:45 -0500 Subject: [PATCH 103/109] extract out extended bits to its own private polling function. --- tamp/_c_src/tamp/compressor.c | 207 +++++++++++++++++----------------- 1 file changed, 102 insertions(+), 105 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index f080809e..a9be3145 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -24,6 +24,10 @@ #define FLUSH_CODE (0xAB) +// Internal return value for poll_extended_handling: signals caller to +// proceed with normal pattern matching rather than returning immediately. +#define TAMP_POLL_CONTINUE ((tamp_res)127) + // encodes [min_pattern_bytes, min_pattern_bytes + 13] pattern lengths static const uint8_t huffman_codes[] = {0x0, 0x3, 0x8, 0xb, 0x14, 0x24, 0x26, 0x2b, 0x4b, 0x54, 0x94, 0x95, 0xaa, 0x27}; // These bit lengths pre-add the 1 bit for the 0-value is_literal flag. @@ -237,14 +241,18 @@ static TAMP_NOINLINE uint8_t get_last_window_byte(TampCompressor* compressor) { * starting from current_pos. Returns the longest match found (which may be at * current_pos itself if O(1) extension works, or at a different position). * + * NOINLINE + Os: Called only during extended match continuation (rare path). + * Outlining saves ~100 bytes in poll on armv6m. + * * @param[in] compressor TampCompressor object * @param[in] current_pos Current match position in window (also search start) * @param[in] current_count Current match length * @param[out] new_pos Position of found match (only valid if new_count > current_count) * @param[out] new_count Length of found match */ -static inline void find_extended_match(TampCompressor* compressor, uint16_t current_pos, uint8_t current_count, - uint16_t* new_pos, uint8_t* new_count) { +static TAMP_NOINLINE TAMP_OPTIMIZE_SIZE void find_extended_match(TampCompressor* compressor, uint16_t current_pos, + uint8_t current_count, uint16_t* new_pos, + uint8_t* new_count) { // Preconditions (guaranteed by caller): // - input_size > 0 // - current_pos + current_count < WINDOW_SIZE @@ -353,6 +361,92 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compres return TAMP_OK; } + +/** + * @brief Handle all extended-specific logic in poll (match continuation + RLE). + * + * NOINLINE + Os: Extended paths are rarely executed. Outlining from poll saves + * significant code size on register-constrained Cortex-M0+ where the compiler + * otherwise spills heavily to stack (~48 bytes saved on armv6m). + * + * @return TAMP_OK if fully handled (caller should return TAMP_OK), + * TAMP_POLL_CONTINUE if caller should proceed to normal pattern matching, + * other tamp_res on error. + */ +static TAMP_NOINLINE TAMP_OPTIMIZE_SIZE tamp_res poll_extended_handling(TampCompressor* compressor, + unsigned char** output, size_t* output_size, + size_t* output_written_size) { + // Handle extended match continuation + if (compressor->extended_match_count) { + const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; + + while (compressor->input_size > 0) { + const uint16_t current_pos = compressor->extended_match_position; + const uint8_t current_count = compressor->extended_match_count; + + if (current_pos + current_count >= WINDOW_SIZE || current_count >= max_ext_match) { + return write_extended_match_token(compressor, output, output_size, output_written_size); + } + + uint16_t new_pos; + uint8_t new_count; + find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); + + if (new_count > current_count) { + uint8_t extra_bytes = new_count - current_count; + compressor->extended_match_position = new_pos; + compressor->extended_match_count = new_count; + compressor->input_pos = input_add(extra_bytes); + compressor->input_size -= extra_bytes; + continue; + } + + return write_extended_match_token(compressor, output, output_size, output_written_size); + } + return TAMP_OK; + } + + // Handle RLE accumulation + uint8_t last_byte = get_last_window_byte(compressor); + + uint8_t rle_available = 0; + while (rle_available < compressor->input_size && compressor->rle_count + rle_available < RLE_MAX_COUNT && + compressor->input[input_add(rle_available)] == last_byte) { + rle_available++; + } + + uint8_t total_rle = compressor->rle_count + rle_available; + bool rle_ended = (rle_available < compressor->input_size) || (total_rle >= RLE_MAX_COUNT); + + if (!rle_ended && total_rle > 0) { + compressor->rle_count = total_rle; + compressor->input_pos = input_add(rle_available); + compressor->input_size -= rle_available; + return TAMP_OK; + } + + if (total_rle >= 2) { + if (total_rle == rle_available && total_rle <= 6) { + uint16_t pattern_index; + uint8_t pattern_size; + find_best_match(compressor, &pattern_index, &pattern_size); + + if (pattern_size > total_rle) { + compressor->rle_count = 0; + return TAMP_POLL_CONTINUE; // Proceed to pattern matching + } + } + + compressor->input_pos = input_add(rle_available); + compressor->input_size -= rle_available; + write_rle_token(compressor, total_rle); + compressor->rle_count = 0; + return TAMP_OK; + } + + if (total_rle == 1) compressor->rle_count = 0; + return TAMP_POLL_CONTINUE; // Proceed to pattern matching +} #endif // TAMP_EXTENDED_COMPRESS TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned char* output, size_t output_size, @@ -362,9 +456,6 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned const uint8_t conf_window = compressor->conf.window; const uint8_t conf_literal = compressor->conf.literal; const uint16_t window_mask = (1 << conf_window) - 1; -#if TAMP_EXTENDED_COMPRESS - const bool conf_extended = compressor->conf.extended; -#endif size_t output_written_size_proxy; if (!output_written_size) output_written_size = &output_written_size_proxy; @@ -382,105 +473,11 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned uint16_t match_index = 0; #if TAMP_EXTENDED_COMPRESS - if (TAMP_UNLIKELY(conf_extended)) { - // Handle extended match continuation - if (compressor->extended_match_count) { - // We're in extended match mode - try to extend the match - const uint8_t max_ext_match = compressor->min_pattern_size + 11 + EXTENDED_MATCH_MAX_EXTRA; - - while (compressor->input_size > 0) { - const uint16_t current_pos = compressor->extended_match_position; - const uint8_t current_count = compressor->extended_match_count; - - // Check if extending would go beyond window buffer boundary (no wrap-around) - if (current_pos + current_count >= WINDOW_SIZE) { - res = write_extended_match_token(compressor, &output, &output_size, output_written_size); - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - return TAMP_OK; - } - - // Check if we've reached max extended match size - if (current_count >= max_ext_match) { - res = write_extended_match_token(compressor, &output, &output_size, output_written_size); - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - return TAMP_OK; - } - - // Search for longer match (includes O(1) extension at same position) - uint16_t new_pos; - uint8_t new_count; - find_extended_match(compressor, current_pos, current_count, &new_pos, &new_count); - - if (new_count > current_count) { - // Found longer match - update and continue - uint8_t extra_bytes = new_count - current_count; - compressor->extended_match_position = new_pos; - compressor->extended_match_count = new_count; - compressor->input_pos = input_add(extra_bytes); - compressor->input_size -= extra_bytes; - continue; - } - - // No longer match found - emit current match - res = write_extended_match_token(compressor, &output, &output_size, output_written_size); - if (TAMP_UNLIKELY(res != TAMP_OK)) return res; - return TAMP_OK; - } - // Ran out of input while extending - return and wait for more - return TAMP_OK; - } - - // Handle RLE accumulation with persistent state - uint8_t last_byte = get_last_window_byte(compressor); - - // Count RLE bytes in current buffer WITHOUT consuming yet - uint8_t rle_available = 0; - while (rle_available < compressor->input_size && compressor->rle_count + rle_available < RLE_MAX_COUNT && - compressor->input[input_add(rle_available)] == last_byte) { - rle_available++; - } - - uint8_t total_rle = compressor->rle_count + rle_available; - bool rle_ended = (rle_available < compressor->input_size) || (total_rle >= RLE_MAX_COUNT); - - // If RLE hasn't ended and we haven't hit max, consume and wait for more - if (!rle_ended && total_rle > 0) { - compressor->rle_count = total_rle; - compressor->input_pos = input_add(rle_available); - compressor->input_size -= rle_available; - return TAMP_OK; - } - - // RLE run has ended - decide between RLE and pattern match - if (total_rle >= 2) { - bool use_pattern = false; - - // For short RLE runs (all from this call), check if pattern match is better - if (total_rle == rle_available && total_rle <= 6) { - uint16_t pattern_index; - uint8_t pattern_size; - find_best_match(compressor, &pattern_index, &pattern_size); - - if (pattern_size > total_rle) { - use_pattern = true; - // Don't consume RLE bytes - fall through to pattern matching - } - } - - if (!use_pattern) { - // Use RLE - consume bytes and write token - compressor->input_pos = input_add(rle_available); - compressor->input_size -= rle_available; - write_rle_token(compressor, total_rle); - compressor->rle_count = 0; - return TAMP_OK; - } - compressor->rle_count = 0; - } else if (total_rle == 1) { - // Single byte - not worth RLE, will be handled as literal/pattern - // Byte is still in input buffer (not consumed), just reset RLE state - compressor->rle_count = 0; - } + if (TAMP_UNLIKELY(compressor->conf.extended)) { + // Handle extended match continuation + RLE (outlined for code size) + res = poll_extended_handling(compressor, &output, &output_size, output_written_size); + if (res != TAMP_POLL_CONTINUE) return res; + // TAMP_POLL_CONTINUE: proceed to pattern matching below } #endif // TAMP_EXTENDED_COMPRESS @@ -544,7 +541,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned } else { #if TAMP_EXTENDED_COMPRESS // Extended: Start extended match continuation - if (conf_extended && match_size > compressor->min_pattern_size + 11) { + if (compressor->conf.extended && match_size > compressor->min_pattern_size + 11) { compressor->extended_match_count = match_size; compressor->extended_match_position = match_index; // Consume matched bytes from input From b31ce735f186f1e35e5e1b1949e70ec3d0c4df4b Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Thu, 5 Feb 2026 20:54:13 -0500 Subject: [PATCH 104/109] more TAMP_OPTIMIZE_SIZE attributes --- tamp/_c_src/tamp/common.c | 4 ++-- tamp/_c_src/tamp/compressor.c | 36 +++++++++++++++++++-------------- tamp/_c_src/tamp/decompressor.c | 14 +++++++------ 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tamp/_c_src/tamp/common.c b/tamp/_c_src/tamp/common.c index b1ef1b9b..be0099a1 100644 --- a/tamp/_c_src/tamp/common.c +++ b/tamp/_c_src/tamp/common.c @@ -24,7 +24,7 @@ static inline uint32_t xorshift32(uint32_t *state) { return x; } -void tamp_initialize_dictionary(unsigned char *buffer, size_t size) { +TAMP_OPTIMIZE_SIZE void tamp_initialize_dictionary(unsigned char *buffer, size_t size) { uint32_t seed = 3758097560; // This was experimentally discovered with tools/find_seed.py uint32_t randbuf = 0; for (size_t i = 0; i < size; i++) { @@ -34,7 +34,7 @@ void tamp_initialize_dictionary(unsigned char *buffer, size_t size) { } } -int8_t tamp_compute_min_pattern_size(uint8_t window, uint8_t literal) { +TAMP_OPTIMIZE_SIZE int8_t tamp_compute_min_pattern_size(uint8_t window, uint8_t literal) { return 2 + (window > (10 + ((literal - 5) << 1))); } diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index a9be3145..0fbb7780 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -60,8 +60,8 @@ static TAMP_NOINLINE void write_to_bit_buffer(TampCompressor* compressor, uint32 * @param[in,out] output_written_size Bytes written (accumulated). * @return TAMP_OK on success, TAMP_OUTPUT_FULL if output buffer is too small. */ -static TAMP_NOINLINE tamp_res partial_flush(TampCompressor* compressor, unsigned char** output, size_t* output_size, - size_t* output_written_size) { +static TAMP_NOINLINE TAMP_OPTIMIZE_SIZE tamp_res partial_flush(TampCompressor* compressor, unsigned char** output, + size_t* output_size, size_t* output_written_size) { while (compressor->bit_buffer_pos >= 8 && *output_size) { *(*output)++ = compressor->bit_buffer >> 24; (*output_size)--; @@ -165,7 +165,8 @@ static inline bool validate_no_match_overlap(uint16_t write_pos, uint16_t match_ } #endif -tamp_res tamp_compressor_init(TampCompressor* compressor, const TampConf* conf, unsigned char* window) { +TAMP_OPTIMIZE_SIZE tamp_res tamp_compressor_init(TampCompressor* compressor, const TampConf* conf, + unsigned char* window) { const TampConf conf_default = { .window = 10, .literal = 8, @@ -217,7 +218,8 @@ tamp_res tamp_compressor_init(TampCompressor* compressor, const TampConf* conf, * @param[in] value The value to encode. * @param[in] trailing_bits Number of trailing bits (3 for extended match, 4 for RLE). */ -static TAMP_NOINLINE void write_extended_huffman(TampCompressor* compressor, uint8_t value, uint8_t trailing_bits) { +static TAMP_NOINLINE TAMP_OPTIMIZE_SIZE void write_extended_huffman(TampCompressor* compressor, uint8_t value, + uint8_t trailing_bits) { uint8_t code_index = value >> trailing_bits; // Write huffman code (without literal flag) + trailing bits in one call write_to_bit_buffer(compressor, (huffman_codes[code_index] << trailing_bits) | (value & ((1 << trailing_bits) - 1)), @@ -229,7 +231,7 @@ static TAMP_NOINLINE void write_extended_huffman(TampCompressor* compressor, uin * * NOINLINE: called from 3 sites; outlining saves ~44 bytes on armv6m. */ -static TAMP_NOINLINE uint8_t get_last_window_byte(TampCompressor* compressor) { +static TAMP_NOINLINE TAMP_OPTIMIZE_SIZE uint8_t get_last_window_byte(TampCompressor* compressor) { uint16_t prev_pos = (compressor->window_pos - 1) & ((1 << compressor->conf.window) - 1); return compressor->window[prev_pos]; } @@ -583,9 +585,11 @@ void tamp_compressor_sink(TampCompressor* compressor, const unsigned char* input } } -tamp_res tamp_compressor_compress_cb(TampCompressor* compressor, unsigned char* output, size_t output_size, - size_t* output_written_size, const unsigned char* input, size_t input_size, - size_t* input_consumed_size, tamp_callback_t callback, void* user_data) { +TAMP_OPTIMIZE_SIZE tamp_res tamp_compressor_compress_cb(TampCompressor* compressor, unsigned char* output, + size_t output_size, size_t* output_written_size, + const unsigned char* input, size_t input_size, + size_t* input_consumed_size, tamp_callback_t callback, + void* user_data) { tamp_res res; size_t input_consumed_size_proxy = 0, output_written_size_proxy = 0; size_t total_input_size = input_size; @@ -700,10 +704,11 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output return res; } -tamp_res tamp_compressor_compress_and_flush_cb(TampCompressor* compressor, unsigned char* output, size_t output_size, - size_t* output_written_size, const unsigned char* input, - size_t input_size, size_t* input_consumed_size, bool write_token, - tamp_callback_t callback, void* user_data) { +TAMP_OPTIMIZE_SIZE tamp_res tamp_compressor_compress_and_flush_cb(TampCompressor* compressor, unsigned char* output, + size_t output_size, size_t* output_written_size, + const unsigned char* input, size_t input_size, + size_t* input_consumed_size, bool write_token, + tamp_callback_t callback, void* user_data) { tamp_res res; size_t flush_size; size_t output_written_size_proxy; @@ -726,9 +731,10 @@ tamp_res tamp_compressor_compress_and_flush_cb(TampCompressor* compressor, unsig #if TAMP_STREAM -tamp_res tamp_compress_stream(TampCompressor* compressor, tamp_read_t read_cb, void* read_handle, tamp_write_t write_cb, - void* write_handle, size_t* input_consumed_size, size_t* output_written_size, - tamp_callback_t callback, void* user_data) { +TAMP_OPTIMIZE_SIZE tamp_res tamp_compress_stream(TampCompressor* compressor, tamp_read_t read_cb, void* read_handle, + tamp_write_t write_cb, void* write_handle, size_t* input_consumed_size, + size_t* output_written_size, tamp_callback_t callback, + void* user_data) { size_t input_consumed_size_proxy, output_written_size_proxy; if (!input_consumed_size) input_consumed_size = &input_consumed_size_proxy; if (!output_written_size) output_written_size = &output_written_size_proxy; diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 3b9a568a..016c3a05 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -273,9 +273,10 @@ tamp_res tamp_decompressor_read_header(TampConf* conf, const unsigned char* inpu * * window * * window_bits_max */ -static tamp_res tamp_decompressor_populate_from_conf(TampDecompressor* decompressor, uint8_t conf_window, - uint8_t conf_literal, uint8_t conf_use_custom_dictionary, - uint8_t conf_extended) { +static TAMP_OPTIMIZE_SIZE tamp_res tamp_decompressor_populate_from_conf(TampDecompressor* decompressor, + uint8_t conf_window, uint8_t conf_literal, + uint8_t conf_use_custom_dictionary, + uint8_t conf_extended) { if (conf_window < 8 || conf_window > 15) return TAMP_INVALID_CONF; if (conf_literal < 5 || conf_literal > 8) return TAMP_INVALID_CONF; if (conf_window > decompressor->window_bits_max) return TAMP_INVALID_CONF; @@ -511,9 +512,10 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne #if TAMP_STREAM -tamp_res tamp_decompress_stream(TampDecompressor* decompressor, tamp_read_t read_cb, void* read_handle, - tamp_write_t write_cb, void* write_handle, size_t* input_consumed_size, - size_t* output_written_size, tamp_callback_t callback, void* user_data) { +TAMP_OPTIMIZE_SIZE tamp_res tamp_decompress_stream(TampDecompressor* decompressor, tamp_read_t read_cb, + void* read_handle, tamp_write_t write_cb, void* write_handle, + size_t* input_consumed_size, size_t* output_written_size, + tamp_callback_t callback, void* user_data) { size_t input_consumed_size_proxy, output_written_size_proxy; if (!input_consumed_size) input_consumed_size = &input_consumed_size_proxy; if (!output_written_size) output_written_size = &output_written_size_proxy; From cde27cf52db3e343395f958cde395d35d93a3a06 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Fri, 6 Feb 2026 08:58:11 -0500 Subject: [PATCH 105/109] some more gcc pragmas to shrink implementation --- tamp/_c_src/tamp/compressor.c | 21 +++++++++++++++++++++ tamp/_c_src/tamp/decompressor.c | 7 +++++++ 2 files changed, 28 insertions(+) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 0fbb7780..5d4bc12e 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -327,6 +327,10 @@ static TAMP_NOINLINE void write_rle_token(TampCompressor* compressor, uint8_t co * @param[in,out] output_written_size Bytes written (accumulated). * @return TAMP_OK on success, TAMP_OUTPUT_FULL if output buffer is too small. */ +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC optimize("-fno-reorder-blocks") +#endif static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compressor, unsigned char** output, size_t* output_size, size_t* output_written_size) { // Pre-check output space to prevent OUTPUT_FULL mid-token (would corrupt bit_buffer) @@ -363,6 +367,9 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compres return TAMP_OK; } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif /** * @brief Handle all extended-specific logic in poll (match continuation + RLE). @@ -451,6 +458,10 @@ static TAMP_NOINLINE TAMP_OPTIMIZE_SIZE tamp_res poll_extended_handling(TampComp } #endif // TAMP_EXTENDED_COMPRESS +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC optimize("-fno-schedule-insns2") +#endif TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned char* output, size_t output_size, size_t* output_written_size) { tamp_res res; @@ -568,6 +579,9 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned return TAMP_OK; } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif void tamp_compressor_sink(TampCompressor* compressor, const unsigned char* input, size_t input_size, size_t* consumed_size) { @@ -628,6 +642,10 @@ TAMP_OPTIMIZE_SIZE tamp_res tamp_compressor_compress_cb(TampCompressor* compress return TAMP_OK; } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC optimize("-fno-tree-pre") +#endif tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output, size_t output_size, size_t* output_written_size, bool write_token) { tamp_res res; @@ -703,6 +721,9 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output return res; } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif TAMP_OPTIMIZE_SIZE tamp_res tamp_compressor_compress_and_flush_cb(TampCompressor* compressor, unsigned char* output, size_t output_size, size_t* output_written_size, diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 016c3a05..4d312790 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -331,6 +331,10 @@ static inline void refill_bit_buffer(TampDecompressor* d, const unsigned char** } } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC optimize("-fno-tree-pre") +#endif tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigned char* output, size_t output_size, size_t* output_written_size, const unsigned char* input, size_t input_size, size_t* input_consumed_size, tamp_callback_t callback, void* user_data) { @@ -509,6 +513,9 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne } return TAMP_INPUT_EXHAUSTED; } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif #if TAMP_STREAM From 1d82747ad90abaf4e8453758e7a471c04487b073 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Fri, 6 Feb 2026 15:09:55 -0500 Subject: [PATCH 106/109] update readme binary-size table --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2a722f01..2f338fc7 100644 --- a/README.md +++ b/README.md @@ -352,10 +352,10 @@ Numbers reported in bytes. Tamp sizes were measured using `arm-none-eabi-gcc` | | Compressor | Decompressor | Compressor + Decompressor | | -------------------------------- | ---------- | ------------ | ------------------------- | | Tamp (MicroPython Native) | 4708 | 4339 | 8124 | -| Tamp (C, no extended, no stream) | 1620 | 1584 | 3000 | -| Tamp (C, no extended) | 2024 | 2036 | 3856 | -| Tamp (C, extended, no stream) | 3072 | 2436 | 5304 | -| Tamp (C, extended) | 3476 | 2888 | 6160 | +| Tamp (C, no extended, no stream) | 1466 | 1312 | 2592 | +| Tamp (C, no extended) | 1748 | 1550 | 3112 | +| Tamp (C, extended, no stream) | 2558 | 2072 | 4444 | +| Tamp (C, extended) | 2840 | 2310 | 4964 | | Heatshrink (C) | 2956 | 3876 | 6832 | | uzlib (C) | 2355 | 3963 | 6318 | From c52f165f65ea9b366998e8f507c9b3cc5840b7d3 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Fri, 6 Feb 2026 15:36:54 -0500 Subject: [PATCH 107/109] avoid memset --- tamp/_c_src/tamp/compressor.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 5d4bc12e..432d97e2 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -185,8 +185,10 @@ TAMP_OPTIMIZE_SIZE tamp_res tamp_compressor_init(TampCompressor* compressor, con if (conf->extended) return TAMP_INVALID_CONF; // Extended requested but not compiled in #endif - for (uint8_t i = 0; i < sizeof(TampCompressor); i++) // Zero-out the struct - ((unsigned char*)compressor)[i] = 0; + { // volatile prevents any GCC version/target from converting this loop to a memset call + volatile unsigned char* p = (volatile unsigned char*)compressor; + for (uint8_t i = 0; i < sizeof(TampCompressor); i++) p[i] = 0; + } // Build header directly from conf (8 bits total) // Layout: [window:3][literal:2][use_custom_dictionary:1][extended:1][more_headers:1] From bf1c3772fb4c97352f36021f2c496dfc81e9ca70 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 8 Feb 2026 21:08:58 -0500 Subject: [PATCH 108/109] TAMP_USE_MEMSET macro --- Makefile | 2 +- tamp/_c_src/tamp/common.h | 19 +++++++++++++++++++ tamp/_c_src/tamp/compressor.c | 5 +---- tamp/_c_src/tamp/decompressor.c | 8 +++----- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 9fd03d22..eb8d8255 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ MOD = tamp # Override -Os with -O2 for better performance (last flag wins) CFLAGS_EXTRA = -O2 -CFLAGS += -Itamp/_c_src -DTAMP_COMPRESSOR=$(TAMP_COMPRESSOR) -DTAMP_DECOMPRESSOR=$(TAMP_DECOMPRESSOR) -DTAMP_STREAM=0 +CFLAGS += -Itamp/_c_src -DTAMP_COMPRESSOR=$(TAMP_COMPRESSOR) -DTAMP_DECOMPRESSOR=$(TAMP_DECOMPRESSOR) -DTAMP_STREAM=0 -DTAMP_USE_MEMSET=0 # Compiler-specific flags based on target architecture ifeq ($(filter $(ARCH),x86 x64),) # Cross-compiling for embedded (ARM, xtensa) - use GCC flags diff --git a/tamp/_c_src/tamp/common.h b/tamp/_c_src/tamp/common.h index 0e5bec7e..984411b4 100644 --- a/tamp/_c_src/tamp/common.h +++ b/tamp/_c_src/tamp/common.h @@ -57,6 +57,25 @@ extern "C" { #define TAMP_OPTIMIZE_SIZE #endif +/* TAMP_USE_MEMSET: Use libc memset (default: 1). + * Set to 0 for environments without libc (e.g. MicroPython native modules). + * When disabled, uses a volatile loop that prevents GCC from emitting a + * memset call at the cost of inhibiting store coalescing. */ +#ifndef TAMP_USE_MEMSET +#define TAMP_USE_MEMSET 1 +#endif + +#if TAMP_USE_MEMSET +#include +#define TAMP_MEMSET(dst, val, n) memset((dst), (val), (n)) +#else +#define TAMP_MEMSET(dst, val, n) \ + do { \ + volatile unsigned char *_tamp_p = (volatile unsigned char *)(dst); \ + for (size_t _tamp_i = 0; _tamp_i < (n); _tamp_i++) _tamp_p[_tamp_i] = (val); \ + } while (0) +#endif + /* Include stream API (tamp_compress_stream, tamp_decompress_stream). * Enabled by default. Disable with -DTAMP_STREAM=0 to save ~2.8KB. */ diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 432d97e2..78527497 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -185,10 +185,7 @@ TAMP_OPTIMIZE_SIZE tamp_res tamp_compressor_init(TampCompressor* compressor, con if (conf->extended) return TAMP_INVALID_CONF; // Extended requested but not compiled in #endif - { // volatile prevents any GCC version/target from converting this loop to a memset call - volatile unsigned char* p = (volatile unsigned char*)compressor; - for (uint8_t i = 0; i < sizeof(TampCompressor); i++) p[i] = 0; - } + TAMP_MEMSET(compressor, 0, sizeof(TampCompressor)); // Build header directly from conf (8 bits total) // Layout: [window:3][literal:2][use_custom_dictionary:1][extended:1][more_headers:1] diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 4d312790..70db78c2 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -131,9 +131,8 @@ static tamp_res decode_rle(TampDecompressor* d, unsigned char** output, const un } /* Write repeated bytes to output */ - for (uint8_t i = 0; i < to_write; i++) { - *(*output)++ = symbol; - } + TAMP_MEMSET(*output, symbol, to_write); + *output += to_write; *output_written_size += to_write; /* Update window only on first chunk (skip==0). @@ -301,8 +300,7 @@ tamp_res tamp_decompressor_init(TampDecompressor* decompressor, const TampConf* // Validate window_bits parameter if (window_bits < 8 || window_bits > 15) return TAMP_INVALID_CONF; - for (uint8_t i = 0; i < sizeof(TampDecompressor); i++) // Zero-out the struct - ((unsigned char*)decompressor)[i] = 0; + TAMP_MEMSET(decompressor, 0, sizeof(TampDecompressor)); decompressor->window = window; decompressor->window_bits_max = window_bits; if (conf) { From 9a66abf319acea473f22cdbba3391ab3893db543 Mon Sep 17 00:00:00 2001 From: Brian Pugh Date: Sun, 8 Feb 2026 21:51:03 -0500 Subject: [PATCH 109/109] xtensa-specific nonsense --- tamp/_c_src/tamp/common.h | 12 ++++++++++++ tamp/_c_src/tamp/compressor.c | 12 ++++++------ tamp/_c_src/tamp/decompressor.c | 4 ++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/tamp/_c_src/tamp/common.h b/tamp/_c_src/tamp/common.h index 984411b4..11006f43 100644 --- a/tamp/_c_src/tamp/common.h +++ b/tamp/_c_src/tamp/common.h @@ -39,6 +39,14 @@ extern "C" { #define TAMP_UNLIKELY(c) (c) #endif +/* Per-function optimize attributes and #pragma GCC push/pop_options require + * GCC on a target that supports them. Xtensa GCC does not. */ +#if defined(__GNUC__) && !defined(__clang__) && !defined(__XTENSA__) +#define TAMP_HAS_GCC_OPTIMIZE 1 +#else +#define TAMP_HAS_GCC_OPTIMIZE 0 +#endif + #if defined(_MSC_VER) #define TAMP_ALWAYS_INLINE __forceinline #define TAMP_NOINLINE __declspec(noinline) @@ -46,7 +54,11 @@ extern "C" { #elif defined(__GNUC__) && !defined(__clang__) #define TAMP_ALWAYS_INLINE inline __attribute__((always_inline)) #define TAMP_NOINLINE __attribute__((noinline)) +#if TAMP_HAS_GCC_OPTIMIZE #define TAMP_OPTIMIZE_SIZE __attribute__((optimize("Os"))) +#else +#define TAMP_OPTIMIZE_SIZE +#endif #elif defined(__clang__) #define TAMP_ALWAYS_INLINE inline __attribute__((always_inline)) #define TAMP_NOINLINE __attribute__((noinline)) diff --git a/tamp/_c_src/tamp/compressor.c b/tamp/_c_src/tamp/compressor.c index 78527497..97760e2d 100644 --- a/tamp/_c_src/tamp/compressor.c +++ b/tamp/_c_src/tamp/compressor.c @@ -326,7 +326,7 @@ static TAMP_NOINLINE void write_rle_token(TampCompressor* compressor, uint8_t co * @param[in,out] output_written_size Bytes written (accumulated). * @return TAMP_OK on success, TAMP_OUTPUT_FULL if output buffer is too small. */ -#if defined(__GNUC__) && !defined(__clang__) +#if TAMP_HAS_GCC_OPTIMIZE #pragma GCC push_options #pragma GCC optimize("-fno-reorder-blocks") #endif @@ -366,7 +366,7 @@ static TAMP_NOINLINE tamp_res write_extended_match_token(TampCompressor* compres return TAMP_OK; } -#if defined(__GNUC__) && !defined(__clang__) +#if TAMP_HAS_GCC_OPTIMIZE #pragma GCC pop_options #endif @@ -457,7 +457,7 @@ static TAMP_NOINLINE TAMP_OPTIMIZE_SIZE tamp_res poll_extended_handling(TampComp } #endif // TAMP_EXTENDED_COMPRESS -#if defined(__GNUC__) && !defined(__clang__) +#if TAMP_HAS_GCC_OPTIMIZE #pragma GCC push_options #pragma GCC optimize("-fno-schedule-insns2") #endif @@ -578,7 +578,7 @@ TAMP_NOINLINE tamp_res tamp_compressor_poll(TampCompressor* compressor, unsigned return TAMP_OK; } -#if defined(__GNUC__) && !defined(__clang__) +#if TAMP_HAS_GCC_OPTIMIZE #pragma GCC pop_options #endif @@ -641,7 +641,7 @@ TAMP_OPTIMIZE_SIZE tamp_res tamp_compressor_compress_cb(TampCompressor* compress return TAMP_OK; } -#if defined(__GNUC__) && !defined(__clang__) +#if TAMP_HAS_GCC_OPTIMIZE #pragma GCC push_options #pragma GCC optimize("-fno-tree-pre") #endif @@ -720,7 +720,7 @@ tamp_res tamp_compressor_flush(TampCompressor* compressor, unsigned char* output return res; } -#if defined(__GNUC__) && !defined(__clang__) +#if TAMP_HAS_GCC_OPTIMIZE #pragma GCC pop_options #endif diff --git a/tamp/_c_src/tamp/decompressor.c b/tamp/_c_src/tamp/decompressor.c index 70db78c2..92af94c6 100644 --- a/tamp/_c_src/tamp/decompressor.c +++ b/tamp/_c_src/tamp/decompressor.c @@ -329,7 +329,7 @@ static inline void refill_bit_buffer(TampDecompressor* d, const unsigned char** } } -#if defined(__GNUC__) && !defined(__clang__) +#if TAMP_HAS_GCC_OPTIMIZE #pragma GCC push_options #pragma GCC optimize("-fno-tree-pre") #endif @@ -511,7 +511,7 @@ tamp_res tamp_decompressor_decompress_cb(TampDecompressor* decompressor, unsigne } return TAMP_INPUT_EXHAUSTED; } -#if defined(__GNUC__) && !defined(__clang__) +#if TAMP_HAS_GCC_OPTIMIZE #pragma GCC pop_options #endif