diff --git a/Modules/Encoders/Base94.luau b/Modules/Encoders/Base94.luau index 4ad8d6a..286d01c 100644 --- a/Modules/Encoders/Base94.luau +++ b/Modules/Encoders/Base94.luau @@ -1,152 +1,140 @@ ---!native --!optimize 2 - --- written by @WalletOverflow on roblox - --- i wonder if declaring a variable pointing to the functions directly would make any difference or if luau automatically optimizes it this way... --- well at least now we are no longer indexing to the tables like the buffer table during run time :smirk: --- i love 2 ms optimizations!! -local string_char = string.char -local string_byte = string.byte -local table_concat = table.concat -local math_floor = math.floor -local bit32_lshift = bit32.lshift -local bit32_rshift = bit32.rshift -local bit32_bor = bit32.bor -local bit32_band = bit32.band -local buffer_create = buffer.create -local buffer_len = buffer.len -local buffer_readu8 = buffer.readu8 -local buffer_writeu8 = buffer.writeu8 - -local alphabet = (function() - local chars = {} - - for code = 32, 127 do - if code ~= 34 and code ~= 92 then - chars[#chars + 1] = string_char(code) - end +--!native +--!strict + +local buffer_len=buffer.len +local buffer_create=buffer.create +local buffer_readu8=buffer.readu8 +local buffer_writeu16=buffer.writeu16 +local buffer_writeu8=buffer.writeu8 +local buffer_copy=buffer.copy +local bit32_bor=bit32.bor +local bit32_lshift=bit32.lshift +local bit32_rshift=bit32.rshift +local bit32_band=bit32.band +local math_ceil=math.ceil + +local DIRECT_LOOKUP = {} +local BIT_MASK = {} + +local ENCODE_MAP = {} +local DECODE_MAP = {} + +local Base94 = {} + +local j = 0 +for i = 32, 127 do + if i ~= 34 and i ~= 92 then + ENCODE_MAP[j] = i + DECODE_MAP[i] = j + j += 1 end +end - return table_concat(chars) -end)() - -local lookupValueToCharacter = buffer_create(94) -local lookupCharacterToValue = buffer_create(256) -local powersOf94 = {94^4, 94^3, 94^2, 94^1, 1} - -for i = 0, 93 do - local charCode = string_byte(alphabet, i + 1) - - buffer_writeu8(lookupValueToCharacter, i, charCode) - buffer_writeu8(lookupCharacterToValue, charCode, i) +for entry = 0, 8835 do + DIRECT_LOOKUP[entry] = bit32_bor( + bit32_lshift(ENCODE_MAP[entry // 94], 8), + ENCODE_MAP[entry % 94] + ) + + BIT_MASK[entry] = (bit32_band(entry, 8191) < 644) and 14 or 13 end -local function encode(input: buffer): buffer - local inLen = buffer_len(input) - local full = math_floor(inLen / 4) - local rem = inLen % 4 - local outLen = full * 5 + (rem > 0 and rem + 1 or 0) - local out = buffer_create(outLen) - - -- full 4-byte chunks - for ci = 0, full - 1 do - local baseIn = ci * 4 - local chunk = bit32_bor( - bit32_lshift(buffer_readu8(input, baseIn), 24), - bit32_lshift(buffer_readu8(input, baseIn + 1), 16), - bit32_lshift(buffer_readu8(input, baseIn + 2), 8), - buffer_readu8(input, baseIn + 3) - ) - - -- decompose into five 0–93 digits and write directly to the buffer, - -- big-endian (most significant digit first) - local baseOut = ci * 5 - local tempChunk = chunk - for i = 4, 0, -1 do - local digit = tempChunk % 94 - tempChunk = math_floor(tempChunk / 94) - buffer_writeu8(out, baseOut + i, buffer_readu8(lookupValueToCharacter, digit)) +function Base94.encode(input: buffer): buffer + local length = buffer_len(input) + + local output = buffer_create(math_ceil(length * 1.25) + 2) + + local counter = 0 + local bits = 0 + local offset = 0 + + for i = 0, length - 1 do + counter = bit32_bor(counter, bit32_lshift(buffer_readu8(input, i), bits)) + bits += 8 + + if bits > 13 then + local entry = bit32_band(counter, 8191) + + if entry < 644 then + entry = bit32_band(counter, 16383) + counter = bit32_rshift(counter, 14) + bits -= 14 + else + counter = bit32_rshift(counter, 13) + bits -= 13 + end + + buffer_writeu16(output, offset, DIRECT_LOOKUP[entry]) + offset += 2 end end - - if rem > 0 then - local baseIn = full * 4 - local chunk = 0 - - if rem >= 1 then chunk = bit32_bor(bit32_lshift(chunk, 8), buffer_readu8(input, baseIn)) end - if rem >= 2 then chunk = bit32_bor(bit32_lshift(chunk, 8), buffer_readu8(input, baseIn + 1)) end - if rem >= 3 then chunk = bit32_bor(bit32_lshift(chunk, 8), buffer_readu8(input, baseIn + 2)) end - - local baseOut = full * 5 - local requiredChars = rem + 1 - - for i = requiredChars - 1, 0, -1 do - local digit = chunk % 94 - chunk = math_floor(chunk / 94) - buffer_writeu8(out, baseOut + i, buffer_readu8(lookupValueToCharacter, digit)) + + if bits > 0 then + if bits > 7 or counter > 93 then + buffer_writeu16(output, offset, DIRECT_LOOKUP[counter]) + offset += 2 + else + buffer_writeu8(output, offset, ENCODE_MAP[counter]) + offset += 1 end end - return out -end + local sliced = buffer_create(offset) + buffer_copy(sliced, 0, output, 0, offset) -local function decode(input: buffer): buffer - local inLen = buffer_len(input) - local full = math_floor(inLen / 5) - local rem = inLen % 5 - if rem == 1 then rem = 0 end -- 1-char tail is invalid padding - local outLen = full * 4 + (rem > 0 and rem - 1 or 0) - local out = buffer_create(outLen) - - -- full 5-char chunks - for ci = 0, full - 1 do - local baseIn = ci * 5 - local value = 0 - - -- reconstruct number using horner's method for efficiency :smirk: - for i = 0, 4 do - local c = buffer_readu8(input, baseIn + i) - local d = buffer_readu8(lookupCharacterToValue, c) - value = value * 94 + d - end + return sliced +end - -- extract b1..b4 - local baseOut = ci * 4 - for i = 0, 3 do - local shift = 24 - (i * 8) - local byte = bit32_band(bit32_rshift(value, shift), 0xFF) - buffer_writeu8(out, baseOut + i, byte) +function Base94.decode(Input: buffer): buffer + local InputLength = buffer_len(Input) + local output = buffer_create(InputLength) + + local offset = 0 + local counter = 0 + local bits = 0 + local entry = -1 + + for i = 0, InputLength - 1 do + local val = DECODE_MAP[buffer_readu8(Input, i)] + if entry == -1 then + entry = val + else + entry += val * 94 + + counter = bit32_bor(counter, bit32_lshift(entry, bits)) + bits += BIT_MASK[entry] + + if bits >= 16 then + buffer_writeu16(output, offset, counter) + offset += 2 + counter = bit32_rshift(counter, 16) + bits -= 16 + elseif bits >= 8 then + buffer_writeu8(output, offset, counter) + offset += 1 + counter = bit32_rshift(counter, 8) + bits -= 8 + end + + entry = -1 end end - - -- partial tail - if rem > 0 then - local baseIn = full * 5 - local value = 0 - - -- reconstruct the number from the big-endian digits - for i = 0, rem - 1 do - local c = buffer_readu8(input, baseIn + i) - local d = buffer_readu8(lookupCharacterToValue, c) - value = value * 94 + d - end - - local requiredBytes = rem - 1 - local baseOut = full * 4 - - -- extract the original bytes from the reconstructed number, big-endian - for i = requiredBytes - 1, 0, -1 do - local byte = bit32_band(value, 0xFF) - value = bit32_rshift(value, 8) - buffer_writeu8(out, baseOut + i, byte) + + if entry ~= -1 then + counter = bit32_bor(counter, bit32_lshift(entry, bits)) + if bit32_bor(counter, 0) ~= 0 then + if bits + (entry < 94 and 7 or 13) >= 8 then + buffer_writeu8(output, offset, counter) + offset += 1 + end end end + + local sliced = buffer_create(offset) + buffer_copy(sliced, 0, output, 0, offset) - return out + return sliced end -return { - encode = encode, - decode = decode, -} +return Base94