diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..442087c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..c32e446 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,48 @@ +name: "CodeQL" + +on: + push: + branches: [ "main" ] + pull_request: + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + permissions: + security-events: write + packages: read + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + language: [ 'go' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Go + uses: actions/setup-go@v6 + with: + cache: false + go-version-file: go.mod + if: ${{ matrix.language == 'go' }} + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: ${{ matrix.language }} + timeout-minutes: 10 + + - name: Autobuild (${{ matrix.language }}) + uses: github/codeql-action/autobuild@v4 + timeout-minutes: 10 + + - name: Perform CodeQL Analysis (${{ matrix.language }}) + uses: github/codeql-action/analyze@v4 + with: + category: "/language:${{matrix.language}}" + timeout-minutes: 10 diff --git a/.github/workflows/tests-main.yml b/.github/workflows/tests-main.yml new file mode 100644 index 0000000..5e7d32a --- /dev/null +++ b/.github/workflows/tests-main.yml @@ -0,0 +1,13 @@ +name: Tests - Main Push + +on: + push: + branches: [ main ] + +permissions: + contents: read + +jobs: + call-reusable: + uses: ./.github/workflows/tests-workflow.yml + diff --git a/.github/workflows/tests-pr.yml b/.github/workflows/tests-pr.yml new file mode 100644 index 0000000..46d2f0f --- /dev/null +++ b/.github/workflows/tests-pr.yml @@ -0,0 +1,12 @@ +name: Tests - Pull Request + +on: + pull_request: + +permissions: + contents: read + +jobs: + call-reusable: + uses: ./.github/workflows/tests-workflow.yml + diff --git a/.github/workflows/tests-workflow.yml b/.github/workflows/tests-workflow.yml new file mode 100644 index 0000000..04a8a5e --- /dev/null +++ b/.github/workflows/tests-workflow.yml @@ -0,0 +1,58 @@ +name: Tests + +on: + workflow_call: + +permissions: + contents: read + +jobs: + lint-and-tidy: + name: Verify Linting + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Set up Go + uses: actions/setup-go@v6 + with: + go-version: "1.25" + + - name: Set up golangci-lint + run: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/v2.7.1/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.7.1 + + - name: Run lint check + run: make lint + + - name: Run tidy check + run: | + go mod tidy + # Fail if go.mod or go.sum changed + git diff --exit-code go.mod go.sum + + test: + name: Verify Unit Tests + runs-on: ubuntu-latest + strategy: + matrix: + go: + - '1.25' + - '1.24' + - '1.23' + - '1.22' + - '1.21' + - '1.20' + - '1.19' + - '1.18' + steps: + - uses: actions/checkout@v6 + + - name: Set up Go ${{ matrix.go }} + uses: actions/setup-go@v6 + with: + go-version: ${{ matrix.go }} + + - name: Run unit tests + run: make test + diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..23b0d5c --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,50 @@ +version: 2 + +linters: + enable: + - asasalint + - asciicheck + - bidichk + - containedctx + - contextcheck + - decorder + - durationcheck + - errcheck + - errorlint + - exptostd + - fatcontext + - forbidigo + - gocheckcompilerdirectives + - gochecksumtype + - goconst + - godoclint + - gosmopolitan + - grouper + - iface + - importas + - mirror + - misspell + - musttag + - nilerr + - nilnil + - perfsprint + - prealloc + - reassign + - sloglint + - testifylint + - thelper + - unconvert + - wastedassign + - whitespace + settings: + goconst: + min-len: 4 + min-occurrences: 4 + +formatters: + enable: + - gofmt + - goimports + +run: + timeout: 600s diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4bd4a9e --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +export GO111MODULE=on + +.PHONY: default test test-cover test-fuzz bench lint + + +test: + go test -race -cover ./... + +test-cover: + go test -race -coverprofile=test.out ./... && go tool cover --html=test.out + +test-fuzz: + go test -fuzz='^FuzzEncode$$' -fuzztime=2m ./base85/... + go test -fuzz='^FuzzDecode$$' -fuzztime=2m ./base85/... + go test -fuzz='^FuzzEncodeWithPadding$$' -fuzztime=2m ./base85/... + go test -fuzz='^FuzzDecodeWithPadding$$' -fuzztime=2m ./base85/... + go test -fuzz='^FuzzStreamRoundTrip$$' -fuzztime=2m ./base85/... + go test -fuzz='^FuzzStreamRoundTripWithPadding$$' -fuzztime=2m ./base85/... + +bench: + go test --benchmem -benchtime=10s -bench='Benchmark.*' -run='^$$' ./... + +lint: + golangci-lint run --timeout=600s && go vet ./... + diff --git a/README.md b/README.md index e0b7327..35c99ee 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,53 @@ # encoding -Encoding utilities for data processing and storage + +Lightweight encoding packages for niche use cases not covered by Go's standard library. These packages are useful when you need compact encodings with specific character set requirements. + +The API closely mirrors Go's `encoding/base64` and related packages, making it easy to transition to standard implementations if they become available. Packages will be deprecated and removed if equivalent functionality is added to the standard library. + +## Installation + +```bash +go get github.com/go-analyze/encoding@latest +``` + +## base85 + +The standard library's `encoding/ascii85` package only supports the Adobe/btoa variant with a fixed alphabet. This package provides base85 encoding with support for custom alphabets via `NewEncoding()`, following the same pattern as `encoding/base64`. + +### RFC1924 + +RFC1924 defines a base85 encoding designed for compact representation of IPv6 addresses. It uses an 85-character alphabet consisting of `0-9`, `A-Z`, `a-z`, and 23 punctuation symbols, deliberately excluding characters that could cause parsing issues in various contexts (quotes, comma, period, slash, colon, brackets, and backslash). + +```go +package main + +import ( + "fmt" + + "github.com/go-analyze/encoding/base85" +) + +func main() { + data := []byte("Hello, World!") + + // Encode + encoded := base85.RFC1924.EncodeToString(data) + fmt.Println(encoded) // NM&qnZ!92JZ*pv8Ap + + // Decode + decoded, err := base85.RFC1924.DecodeString(encoded) + if err != nil { + panic(err) + } + fmt.Println(string(decoded)) // Hello, World! +} +``` + +### Custom Alphabets + +Create encodings with custom 85-character alphabets: + +```go +enc := base85.NewEncoding("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") +encoded := enc.EncodeToString(data) +``` diff --git a/base85/bench_test.go b/base85/bench_test.go new file mode 100644 index 0000000..58c6c57 --- /dev/null +++ b/base85/bench_test.go @@ -0,0 +1,49 @@ +package base85 + +import ( + "encoding/ascii85" + "testing" +) + +var benchData = []byte("The quick brown fox jumps over the lazy dog. 0123456789!@#$%^&*()") + +func BenchmarkEncodeBase85(b *testing.B) { + dst := make([]byte, RFC1924.EncodedLen(len(benchData))) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + RFC1924.Encode(dst, benchData) + } +} + +func BenchmarkDecodeBase85(b *testing.B) { + encoded := RFC1924.EncodeToString(benchData) + src := []byte(encoded) + dst := make([]byte, RFC1924.DecodedLen(len(src))) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, _ = RFC1924.Decode(dst, src) + } +} + +func BenchmarkEncodeAscii85(b *testing.B) { + dst := make([]byte, ascii85.MaxEncodedLen(len(benchData))) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + ascii85.Encode(dst, benchData) + } +} + +func BenchmarkDecodeAscii85(b *testing.B) { + src := make([]byte, ascii85.MaxEncodedLen(len(benchData))) + n := ascii85.Encode(src, benchData) + src = src[:n] + dst := make([]byte, len(benchData)) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, _, _ = ascii85.Decode(dst, src, true) + } +} diff --git a/base85/decode_test.go b/base85/decode_test.go new file mode 100644 index 0000000..887528c --- /dev/null +++ b/base85/decode_test.go @@ -0,0 +1,233 @@ +package base85 + +import ( + "bytes" + "encoding/ascii85" + "io" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDecode(t *testing.T) { + t.Parallel() + + // test cases avoid 4 consecutive zero bytes (which ascii85.Encode outputs as 'z') + tests := []struct { + name string + input []byte + }{ + {"empty", []byte{}}, + {"single_byte", []byte{0x01}}, + {"two_bytes", []byte{0x01, 0x02}}, + {"three_bytes", []byte{0x01, 0x02, 0x03}}, + {"four_bytes", []byte{0x01, 0x02, 0x03, 0x04}}, + {"five_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05}}, + {"eight_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}}, + {"hello_world", []byte("Hello, World!")}, + {"binary_data", []byte{0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA}}, + {"high_values", []byte{0xFF, 0xFF, 0xFF, 0xFF}}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // encode with standard ascii85 + encoded := make([]byte, ascii85.MaxEncodedLen(len(tc.input))) + n := ascii85.Encode(encoded, tc.input) + encoded = encoded[:n] + + // decode with our implementation + dst := make([]byte, ascii85Encoding.DecodedLen(len(encoded))) + ndst, err := ascii85Encoding.Decode(dst, encoded) + + require.NoError(t, err) + assert.Equal(t, tc.input, dst[:ndst]) + }) + } +} + +func TestDecodeString(t *testing.T) { + t.Parallel() + + // bytes for "日本語" without triggering gosmopolitan + unicodeBytes := []byte{0xe6, 0x97, 0xa5, 0xe6, 0x9c, 0xac, 0xe8, 0xaa, 0x9e} + + tests := []struct { + name string + input []byte + }{ + {"empty", []byte{}}, + {"simple_text", []byte("test")}, + {"unicode", unicodeBytes}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // encode with standard ascii85 + encoded := make([]byte, ascii85.MaxEncodedLen(len(tc.input))) + n := ascii85.Encode(encoded, tc.input) + encodedStr := string(encoded[:n]) + + // decode with our implementation + decoded, err := ascii85Encoding.DecodeString(encodedStr) + + require.NoError(t, err) + assert.Equal(t, tc.input, decoded) + }) + } +} + +func TestAppendDecode(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + prefix []byte + input []byte + }{ + {"empty_prefix", nil, []byte("test")}, + {"with_prefix", []byte("decoded:"), []byte("data")}, + {"empty_input", []byte("prefix:"), []byte{}}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // encode with standard ascii85 + encoded := make([]byte, ascii85.MaxEncodedLen(len(tc.input))) + n := ascii85.Encode(encoded, tc.input) + encoded = encoded[:n] + + // decode with our implementation + result, err := ascii85Encoding.AppendDecode(tc.prefix, encoded) + + require.NoError(t, err) + + // verify prefix is preserved + if tc.prefix != nil { + assert.Equal(t, string(tc.prefix), string(result[:len(tc.prefix)])) + } + + // verify decoded content + decodedPortion := result[len(tc.prefix):] + assert.Equal(t, tc.input, decodedPortion) + }) + } +} + +func TestDecodeCorruptInput(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + }{ + {"invalid_char", "hello\x00world"}, + {"out_of_alphabet", "~~~~~"}, + {"single_trailing_char", "AAAAA0"}, // 5 valid chars + 1 trailing (invalid) + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + _, err := ascii85Encoding.DecodeString(tc.input) + require.Error(t, err) + + var corruptErr CorruptInputError + assert.ErrorAs(t, err, &corruptErr) + }) + } +} + +func TestDecodeWhitespace(t *testing.T) { + t.Parallel() + + input := []byte("test") + + // encode with standard ascii85 + encoded := make([]byte, ascii85.MaxEncodedLen(len(input))) + n := ascii85.Encode(encoded, input) + encodedStr := string(encoded[:n]) + + // insert whitespace + withWhitespace := encodedStr[:2] + " \t\n\r" + encodedStr[2:] + + // decode should ignore whitespace + decoded, err := ascii85Encoding.DecodeString(withWhitespace) + + require.NoError(t, err) + assert.Equal(t, input, decoded) +} + +func TestNewDecoder(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input []byte + readSize int // buffer size for Read calls, 0 means read all at once + }{ + {"empty", []byte{}, 0}, + {"single_byte", []byte{0x01}, 0}, + {"four_bytes", []byte{0x01, 0x02, 0x03, 0x04}, 0}, + {"five_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05}, 0}, + {"hello_world", []byte("Hello, World!"), 0}, + {"small_reads", []byte("Hello, World!"), 3}, + {"byte_by_byte", []byte("test"), 1}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // encode with standard ascii85 + encoded := make([]byte, ascii85.MaxEncodedLen(len(tc.input))) + n := ascii85.Encode(encoded, tc.input) + encoded = encoded[:n] + + // decode with our stream decoder + dec := NewDecoder(ascii85Encoding, bytes.NewReader(encoded)) + + var decoded []byte + if tc.readSize == 0 { + // read all at once + var err error + decoded, err = io.ReadAll(dec) + require.NoError(t, err) + } else { + // read in chunks + buf := make([]byte, tc.readSize) + for { + nr, err := dec.Read(buf) + if nr > 0 { + decoded = append(decoded, buf[:nr]...) + } + if err == io.EOF { + break + } + require.NoError(t, err) + } + } + + assert.Equal(t, tc.input, decoded) + }) + } +} + +func TestNewDecoderWithWhitespace(t *testing.T) { + t.Parallel() + + input := []byte("Hello, World!") + + // encode with standard ascii85 + encoded := make([]byte, ascii85.MaxEncodedLen(len(input))) + n := ascii85.Encode(encoded, input) + encodedStr := string(encoded[:n]) + + // insert whitespace + withWhitespace := encodedStr[:5] + "\n\t " + encodedStr[5:] + + // decode with stream decoder + dec := NewDecoder(ascii85Encoding, bytes.NewReader([]byte(withWhitespace))) + decoded, err := io.ReadAll(dec) + + require.NoError(t, err) + assert.Equal(t, input, decoded) +} diff --git a/base85/encode_test.go b/base85/encode_test.go new file mode 100644 index 0000000..5cc211c --- /dev/null +++ b/base85/encode_test.go @@ -0,0 +1,142 @@ +package base85 + +import ( + "bytes" + "encoding/ascii85" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ASCII85 alphabet for testing against standard library +// Characters '!' (33) through 'u' (117) +const ascii85Alphabet = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" + +var ascii85Encoding = NewEncoding(ascii85Alphabet) + +func TestEncodeToString(t *testing.T) { + t.Parallel() + + // bytes for "日本語" without triggering gosmopolitan + unicodeBytes := []byte{0xe6, 0x97, 0xa5, 0xe6, 0x9c, 0xac, 0xe8, 0xaa, 0x9e} + + tests := []struct { + name string + input []byte + }{ + {"empty", []byte{}}, + {"single_byte", []byte{0x01}}, + {"two_bytes", []byte{0x01, 0x02}}, + {"three_bytes", []byte{0x01, 0x02, 0x03}}, + {"four_bytes", []byte{0x01, 0x02, 0x03, 0x04}}, + {"five_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05}}, + {"eight_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}}, + {"hello_world", []byte("Hello, World!")}, + {"binary_data", []byte{0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA}}, + {"high_values", []byte{0xFF, 0xFF, 0xFF, 0xFF}}, + {"mixed_content", []byte("Test\x00\x01\x02data")}, + {"unicode", unicodeBytes}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + encoded := ascii85Encoding.EncodeToString(tc.input) + + // verify via standard library decode + dst := make([]byte, len(tc.input)+4) + ndst, _, err := ascii85.Decode(dst, []byte(encoded), true) + + require.NoError(t, err) + assert.Equal(t, tc.input, dst[:ndst]) + }) + } +} + +func TestAppendEncode(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + prefix []byte + input []byte + }{ + {"empty_prefix", nil, []byte("test")}, + {"with_prefix", []byte("prefix:"), []byte("data")}, + {"empty_input", []byte("prefix:"), []byte{}}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := ascii85Encoding.AppendEncode(tc.prefix, tc.input) + + // verify prefix is preserved + if tc.prefix != nil { + assert.Equal(t, string(tc.prefix), string(result[:len(tc.prefix)])) + } + + // extract encoded portion and decode + encodedPortion := result[len(tc.prefix):] + dst := make([]byte, len(tc.input)+4) + ndst, _, err := ascii85.Decode(dst, encodedPortion, true) + + require.NoError(t, err) + assert.Equal(t, tc.input, dst[:ndst]) + }) + } +} + +func TestNewEncoder(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input []byte + writes []int // chunk sizes for multiple writes + }{ + {"empty", []byte{}, nil}, + {"single_byte", []byte{0x01}, nil}, + {"four_bytes", []byte{0x01, 0x02, 0x03, 0x04}, nil}, + {"five_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05}, nil}, + {"hello_world", []byte("Hello, World!"), nil}, + {"chunked_write", []byte("Hello, World!"), []int{3, 5, 5}}, + {"byte_by_byte", []byte("test"), []int{1, 1, 1, 1}}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(ascii85Encoding, &buf) + + if tc.writes == nil { + // single write + n, err := enc.Write(tc.input) + require.NoError(t, err) + assert.Equal(t, len(tc.input), n) + } else { + // multiple writes + offset := 0 + for _, size := range tc.writes { + end := offset + size + if end > len(tc.input) { + end = len(tc.input) + } + n, err := enc.Write(tc.input[offset:end]) + require.NoError(t, err) + assert.Equal(t, end-offset, n) + offset = end + } + } + + require.NoError(t, enc.Close()) + + // verify via standard library decode + encoded := buf.Bytes() + dst := make([]byte, len(tc.input)+4) + ndst, _, err := ascii85.Decode(dst, encoded, true) + + require.NoError(t, err) + assert.Equal(t, tc.input, dst[:ndst]) + }) + } +} diff --git a/base85/encoding.go b/base85/encoding.go new file mode 100644 index 0000000..08cdc26 --- /dev/null +++ b/base85/encoding.go @@ -0,0 +1,601 @@ +package base85 + +import ( + "io" + "runtime" + "strconv" +) + +const ( + // NoPadding is used with WithPadding to disable padding. This is the default for base85 encodings. + NoPadding rune = -1 + + alphabetSize = 85 + + encodeRFC1924 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" + + // powers of 85 for decoding + pow85_1 = 85 + pow85_2 = 85 * 85 + pow85_3 = 85 * 85 * 85 + pow85_4 = 85 * 85 * 85 * 85 +) + +// RFC1924 is the encoding defined in RFC 1924 for compact representation of +// IPv6 addresses. It uses characters 0-9, A-Z, a-z, and 23 punctuation symbols, +// with no padding by default. +var RFC1924 = NewEncoding(encodeRFC1924) + +// Encoding represents a base85 encoding/decoding scheme defined by an 85-character alphabet. +type Encoding struct { + encode [alphabetSize]byte + decodeMap [256]uint8 + padChar rune +} + +// NewEncoding returns a new Encoding defined by the given 85-character +// alphabet, which must contain only unique ASCII characters and must not +// contain newline characters ('\r', '\n'). The resulting Encoding uses no +// padding by default. +func NewEncoding(encoder string) *Encoding { + if len(encoder) != alphabetSize { + panic("base85: encoding alphabet must be 85 bytes long") + } + + e := &Encoding{padChar: NoPadding} + copy(e.encode[:], encoder) + + for i := range e.decodeMap { + e.decodeMap[i] = 0xFF + } + + for i, c := range encoder { + if c > 127 { + panic("base85: encoding alphabet must contain only ASCII characters") + } else if c == '\n' || c == '\r' { + panic("base85: encoding alphabet contains newline character") + } else if e.decodeMap[c] != 0xFF { + panic("base85: encoding alphabet contains duplicate character") + } + e.decodeMap[c] = uint8(i) + } + + return e +} + +// WithPadding creates a new Encoding identical to enc except with a specified +// padding character, or NoPadding to disable padding. The padding character +// must be an ASCII character, must not be '\r' or '\n', and must not be +// contained in the encoding alphabet. +func (enc Encoding) WithPadding(padding rune) *Encoding { + if padding == '\r' || padding == '\n' { + panic("base85: invalid padding character") + } + + if padding != NoPadding { + if padding > 127 { + panic("base85: padding character must be ASCII") + } + for _, c := range enc.encode { + if rune(c) == padding { + panic("base85: padding character is in alphabet") + } + } + } + + enc.padChar = padding + return &enc +} + +// EncodedLen returns the length in bytes of the base85 encoding of an input buffer of length n. +func (enc *Encoding) EncodedLen(n int) int { + if enc.padChar == NoPadding { + // 4 bytes -> 5 chars, partial blocks: 1->2, 2->3, 3->4 + fullBlocks := n / 4 + remainder := n % 4 + length := fullBlocks * 5 + if remainder > 0 { + length += remainder + 1 + } + return length + } + // with padding: always multiple of 5 + return (n + 3) / 4 * 5 +} + +// DecodedLen returns the maximum length in bytes of the decoded data corresponding to n bytes of base85-encoded data. +func (enc *Encoding) DecodedLen(n int) int { + // 5 chars -> 4 bytes, partial: 2->1, 3->2, 4->3 + fullBlocks := n / 5 + remainder := n % 5 + length := fullBlocks * 4 + if remainder > 1 { + length += remainder - 1 + } + return length +} + +// Encode encodes src using the encoding enc, writing EncodedLen(len(src)) bytes to dst. +func (enc *Encoding) Encode(dst, src []byte) { + if len(src) == 0 { + return + } + + di := 0 + for len(src) >= 4 { + // big-endian uint32 + val := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3]) + + dst[di+4] = enc.encode[val%85] + val /= 85 + dst[di+3] = enc.encode[val%85] + val /= 85 + dst[di+2] = enc.encode[val%85] + val /= 85 + dst[di+1] = enc.encode[val%85] + val /= 85 + dst[di] = enc.encode[val%85] + + src = src[4:] + di += 5 + } + + // handle remaining bytes + if len(src) > 0 { + var val uint32 + switch len(src) { + case 3: + val = uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 + case 2: + val = uint32(src[0])<<24 | uint32(src[1])<<16 + case 1: + val = uint32(src[0]) << 24 + } + + // encode and output only needed characters + var buf [5]byte + buf[4] = enc.encode[val%85] + val /= 85 + buf[3] = enc.encode[val%85] + val /= 85 + buf[2] = enc.encode[val%85] + val /= 85 + buf[1] = enc.encode[val%85] + val /= 85 + buf[0] = enc.encode[val%85] + + // output chars: 1 byte -> 2 chars, 2 bytes -> 3 chars, 3 bytes -> 4 chars + outLen := len(src) + 1 + copy(dst[di:], buf[:outLen]) + di += outLen + + // add padding if needed + if enc.padChar != NoPadding { + for i := outLen; i < 5; i++ { + dst[di] = byte(enc.padChar) + di++ + } + } + } +} + +// EncodeToString returns the base85 encoding of src. +func (enc *Encoding) EncodeToString(src []byte) string { + buf := make([]byte, enc.EncodedLen(len(src))) + enc.Encode(buf, src) + return string(buf) +} + +// AppendEncode appends the base85 encoding of src to dst and returns the extended buffer. +func (enc *Encoding) AppendEncode(dst, src []byte) []byte { + n := enc.EncodedLen(len(src)) + dst = append(dst, make([]byte, n)...) + enc.Encode(dst[len(dst)-n:], src) + return dst +} + +// Decode decodes src using the encoding enc. It writes at most DecodedLen(len(src)) bytes +// to dst and returns the number of bytes written. Whitespace (space, tab, CR, LF) is ignored +// unless included in the encoding alphabet. If src contains invalid base85 data, it will +// return the number of bytes successfully written and CorruptInputError. +func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { + if len(src) == 0 { + return 0, nil + } + + var nb int + var digits [5]uint32 + hasPadding := enc.padChar != NoPadding + padCount := 0 // tracks padding chars seen in current block + + for i := 0; i < len(src); i++ { + c := src[i] + + // skip whitespace if not in alphabet + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') && enc.decodeMap[c] == 0xFF { + continue + } + + // padding handling + if hasPadding && rune(c) == enc.padChar { + if nb < 2 { + return n, CorruptInputError(i) + } + padCount++ + if nb+padCount == 5 { + // block complete - decode and reset + n += decodePartial(dst[n:], digits[:], nb) + nb = 0 + padCount = 0 + } + continue + } + + // data char after padding started is an error + if padCount > 0 { + return n, CorruptInputError(i) + } + + d := enc.decodeMap[c] + if d == 0xFF { + return n, CorruptInputError(i) + } + + digits[nb] = uint32(d) + nb++ + + if nb == 5 { + val := digits[0]*pow85_4 + digits[1]*pow85_3 + digits[2]*pow85_2 + digits[3]*pow85_1 + digits[4] + dst[n] = byte(val >> 24) + dst[n+1] = byte(val >> 16) + dst[n+2] = byte(val >> 8) + dst[n+3] = byte(val) + n += 4 + nb = 0 + } + } + + // handle remaining digits (unpadded case) + if nb > 0 { + if nb == 1 || padCount > 0 { + return n, CorruptInputError(len(src)) + } + n += decodePartial(dst[n:], digits[:], nb) + } + + return n, nil +} + +// decodePartial decodes 2-4 accumulated digit values into output bytes. +func decodePartial(dst []byte, digits []uint32, nb int) int { + // fill remaining with 84 (highest digit) for implicit padding + for i := nb; i < 5; i++ { + digits[i] = 84 + } + val := digits[0]*pow85_4 + digits[1]*pow85_3 + digits[2]*pow85_2 + digits[3]*pow85_1 + digits[4] + for i := 0; i < nb-1; i++ { + dst[i] = byte(val >> 24) + val <<= 8 + } + return nb - 1 +} + +// decodeBlock decodes 2-5 base85 alphabet bytes into 1-4 output bytes. +// Returns the number of bytes written to dst. Caller must ensure all bytes +// in src are valid alphabet characters (not padding, not invalid). +func (enc *Encoding) decodeBlock(dst, src []byte) int { + // initialize with highest alphabet index (84) for implicit padding + d0, d1, d2, d3, d4 := uint32(84), uint32(84), uint32(84), uint32(84), uint32(84) + + // map input bytes to digit values + switch len(src) { + case 5: + d4 = uint32(enc.decodeMap[src[4]]) + fallthrough + case 4: + d3 = uint32(enc.decodeMap[src[3]]) + fallthrough + case 3: + d2 = uint32(enc.decodeMap[src[2]]) + fallthrough + case 2: + d1 = uint32(enc.decodeMap[src[1]]) + d0 = uint32(enc.decodeMap[src[0]]) + } + + val := d0*pow85_4 + d1*pow85_3 + d2*pow85_2 + d3*pow85_1 + d4 + + // output length: 5 chars -> 4 bytes, otherwise len-1 + // only write the bytes we actually produce + switch len(src) { + case 5: + dst[3] = byte(val) + fallthrough + case 4: + dst[2] = byte(val >> 8) + fallthrough + case 3: + dst[1] = byte(val >> 16) + fallthrough + case 2: + dst[0] = byte(val >> 24) + } + + if len(src) == 5 { + return 4 + } + return len(src) - 1 +} + +// decodeFiltered decodes pre-validated and filtered input (whitespace removed, +// but padding chars still present if padding is enabled). +func (enc *Encoding) decodeFiltered(dst, src []byte) (n int, err error) { + if len(src) == 0 { + return 0, nil + } + + consumed := 0 + for len(src) >= 5 { + // find data length in this block (may end early due to padding) + dataLen := 5 + if enc.padChar != NoPadding { + for i := 0; i < 5; i++ { + if rune(src[i]) == enc.padChar { + dataLen = i + break + } + } + // validate: all chars after first padding must also be padding + for i := dataLen; i < 5; i++ { + if rune(src[i]) != enc.padChar { + return n, CorruptInputError(consumed + i) + } + } + // need at least 2 data chars to produce 1 byte + if dataLen < 2 { + return n, CorruptInputError(consumed + dataLen) + } + } + + // validate all data chars are in alphabet + for i := 0; i < dataLen; i++ { + if enc.decodeMap[src[i]] == 0xFF { + return n, CorruptInputError(consumed + i) + } + } + + n += enc.decodeBlock(dst[n:], src[:dataLen]) + src = src[5:] + consumed += 5 + } + + // handle remaining 1-4 chars (only valid when padding is disabled or no padding present) + if len(src) > 0 { + if len(src) == 1 { + return n, CorruptInputError(consumed) + } + // remaining chars cannot contain padding + for i, c := range src { + if enc.padChar != NoPadding && rune(c) == enc.padChar { + return n, CorruptInputError(consumed + i) + } + if enc.decodeMap[c] == 0xFF { + return n, CorruptInputError(consumed + i) + } + } + n += enc.decodeBlock(dst[n:], src) + } + + return n, nil +} + +// DecodeString returns the bytes represented by the base85 string s. +func (enc *Encoding) DecodeString(s string) ([]byte, error) { + dst := make([]byte, enc.DecodedLen(len(s))) + n, err := enc.Decode(dst, []byte(s)) + return dst[:n], err +} + +// AppendDecode appends the base85 decoding of src to dst and returns the extended buffer. +// If the input is malformed, it returns the partially decoded src and an error. +func (enc *Encoding) AppendDecode(dst, src []byte) ([]byte, error) { + n := enc.DecodedLen(len(src)) + dst = append(dst, make([]byte, n)...) + written, err := enc.Decode(dst[len(dst)-n:], src) + return dst[:len(dst)-n+written], err +} + +// NewEncoder returns a new base85 stream encoder. Data written to the +// returned writer will be encoded using enc and then written to w. +// Base85 encodings operate in 4-byte blocks; when finished writing, +// the caller must Close the returned encoder to flush any partially written blocks. +func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser { + return &encoder{enc: enc, w: w} +} + +type encoder struct { + enc *Encoding + w io.Writer + buf [4]byte + nbuf int + outBuf [5]byte + err error +} + +func (e *encoder) Write(p []byte) (n int, err error) { + if e.err != nil { + return 0, e.err + } + + // use buffered data first + if e.nbuf > 0 { + for len(p) > 0 && e.nbuf < 4 { + e.buf[e.nbuf] = p[0] + e.nbuf++ + p = p[1:] + n++ + } + if e.nbuf == 4 { + e.enc.Encode(e.outBuf[:], e.buf[:]) + if _, e.err = e.w.Write(e.outBuf[:]); e.err != nil { + return n, e.err + } + e.nbuf = 0 + } + } + + // encode full blocks + for len(p) >= 4 { + e.enc.Encode(e.outBuf[:], p[:4]) + if _, e.err = e.w.Write(e.outBuf[:]); e.err != nil { + return n, e.err + } + p = p[4:] + n += 4 + } + + // buffer remaining + for len(p) > 0 { + e.buf[e.nbuf] = p[0] + e.nbuf++ + p = p[1:] + n++ + } + + return n, nil +} + +func (e *encoder) Close() error { + if e.err != nil { + return e.err + } + + if e.nbuf > 0 { + encoded := make([]byte, e.enc.EncodedLen(e.nbuf)) + e.enc.Encode(encoded, e.buf[:e.nbuf]) + if _, e.err = e.w.Write(encoded); e.err != nil { + return e.err + } + } + + return nil +} + +// NewDecoder constructs a new base85 stream decoder. Data read from the returned reader will be decoded using enc. +func NewDecoder(enc *Encoding, r io.Reader) io.Reader { + return &decoder{enc: enc, r: r} +} + +type decoder struct { + enc *Encoding + r io.Reader + readBuf [1024]byte + encBuf [4]byte // buffer for incomplete encoded blocks (max 4 chars waiting for 5th) + nenc int // number of valid bytes in encBuf + outBuf []byte // buffered decoded output + err error + eof bool +} + +func (d *decoder) Read(p []byte) (n int, err error) { + // return buffered decoded data first + if len(d.outBuf) > 0 { + n = copy(p, d.outBuf) + d.outBuf = d.outBuf[n:] + return n, nil + } else if d.err != nil { + return 0, d.err + } else if d.eof { + return 0, io.EOF + } + + // loop until we have data to return or hit EOF/error + for { + // read more encoded data + nr, readErr := d.r.Read(d.readBuf[:]) + if readErr != nil && readErr != io.EOF { + // store error but process any data that was read + d.err = readErr + d.eof = true // treat terminal error as end of stream for decoding + if nr == 0 { + return 0, readErr + } + } else if readErr == io.EOF { + d.eof = true + } + if nr == 0 && !d.eof && d.err == nil { + // reader returned (0, nil), yield and retry + runtime.Gosched() + continue + } + + // filter whitespace and padding, combine with buffered encoded data + filtered := make([]byte, 0, d.nenc+nr) + filtered = append(filtered, d.encBuf[:d.nenc]...) + d.nenc = 0 + for i, c := range d.readBuf[:nr] { + if d.enc.padChar != NoPadding && rune(c) == d.enc.padChar { + filtered = append(filtered, c) + continue + } else if (c == ' ' || c == '\t' || c == '\n' || c == '\r') && d.enc.decodeMap[c] == 0xFF { + continue + } else if d.enc.decodeMap[c] == 0xFF { + d.err = CorruptInputError(i) + return 0, d.err + } + filtered = append(filtered, c) + } + + // if not at EOF, buffer incomplete block for next read + if !d.eof { + remainder := len(filtered) % 5 + if remainder > 0 { + d.nenc = copy(d.encBuf[:], filtered[len(filtered)-remainder:]) + filtered = filtered[:len(filtered)-remainder] + } + } + + if len(filtered) == 0 { + if d.eof { + if d.err == nil { + d.err = io.EOF + } + return 0, d.err + } else if d.err != nil { + return 0, d.err + } + continue // need more data + } + + // decode the filtered data + decoded := make([]byte, d.enc.DecodedLen(len(filtered))) + nd, decErr := d.enc.decodeFiltered(decoded, filtered) + if decErr != nil { + d.err = decErr + // still return what we decoded + n = copy(p, decoded[:nd]) + if n < nd { + d.outBuf = decoded[n:nd] + } + if n > 0 { + return n, nil // defer error until buffer drained + } + return 0, d.err + } + + // copy to output + n = copy(p, decoded[:nd]) + if n < nd { + d.outBuf = decoded[n:nd] + } + return n, nil + } +} + +// CorruptInputError is returned by Decode when the input contains invalid base85 data. +// The integer value represents the byte offset where the error was detected. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "base85: illegal character at offset " + strconv.FormatInt(int64(e), 10) +} diff --git a/base85/encoding_test.go b/base85/encoding_test.go new file mode 100644 index 0000000..af774c2 --- /dev/null +++ b/base85/encoding_test.go @@ -0,0 +1,1133 @@ +package base85 + +import ( + "bytes" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRFC1924RoundTrip(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input []byte + }{ + {"empty", []byte{}}, + {"single_byte", []byte{0x00}}, + {"single_nonzero", []byte{0x42}}, + {"two_bytes", []byte{0x01, 0x02}}, + {"three_bytes", []byte{0x01, 0x02, 0x03}}, + {"four_bytes", []byte{0x01, 0x02, 0x03, 0x04}}, + {"five_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05}}, + {"eight_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}}, + {"hello_world", []byte("Hello, World!")}, + {"all_zeros", []byte{0x00, 0x00, 0x00, 0x00}}, + {"all_ones", []byte{0xFF, 0xFF, 0xFF, 0xFF}}, + {"binary_sequence", []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}}, + {"unicode_text", []byte{0xe6, 0x97, 0xa5, 0xe6, 0x9c, 0xac, 0xe8, 0xaa, 0x9e}}, + {"mixed_content", []byte("Test\x00\x01\x02\xFF\xFEdata")}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + encoded := RFC1924.EncodeToString(tc.input) + decoded, err := RFC1924.DecodeString(encoded) + + require.NoError(t, err) + assert.Equal(t, tc.input, decoded) + }) + } +} + +func TestRFC1924RoundTripWithPadding(t *testing.T) { + t.Parallel() + + // use '.' since '=' is in the RFC1924 alphabet + paddedRFC1924 := RFC1924.WithPadding('.') + + tests := []struct { + name string + input []byte + }{ + {"empty", []byte{}}, + {"single_byte", []byte{0x42}}, + {"two_bytes", []byte{0x01, 0x02}}, + {"three_bytes", []byte{0x01, 0x02, 0x03}}, + {"four_bytes", []byte{0x01, 0x02, 0x03, 0x04}}, + {"five_bytes", []byte{0x01, 0x02, 0x03, 0x04, 0x05}}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + encoded := paddedRFC1924.EncodeToString(tc.input) + + // padded output length is always a multiple of 5 + if len(tc.input) > 0 { + assert.Equal(t, 0, len(encoded)%5) + } + + decoded, err := paddedRFC1924.DecodeString(encoded) + + require.NoError(t, err) + assert.Equal(t, tc.input, decoded) + }) + } +} + +func TestNewEncoding(t *testing.T) { + t.Parallel() + + t.Run("valid_alphabet", func(t *testing.T) { + enc := NewEncoding(encodeRFC1924) + require.NotNil(t, enc) + assert.Equal(t, NoPadding, enc.padChar) + }) + + t.Run("wrong_length", func(t *testing.T) { + assert.Panics(t, func() { NewEncoding("short") }) + }) + + t.Run("duplicate_char", func(t *testing.T) { + // 85 chars with duplicate + alphabet := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}0" + assert.Panics(t, func() { NewEncoding(alphabet) }) + }) + + t.Run("contains_newline", func(t *testing.T) { + alphabet := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}\n" + assert.Panics(t, func() { NewEncoding(alphabet) }) + }) + + t.Run("contains_cr", func(t *testing.T) { + alphabet := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}\r" + assert.Panics(t, func() { NewEncoding(alphabet) }) + }) + + t.Run("contains_non_ascii", func(t *testing.T) { + // 85 chars with a non-ASCII character (é = 0xC3 0xA9 in UTF-8) + alphabet := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}é" + assert.Panics(t, func() { NewEncoding(alphabet) }) + }) + + t.Run("contains_high_byte", func(t *testing.T) { + // 85 bytes with a high byte (0x80+) that's not valid ASCII + alphabet := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}\x80" + assert.Panics(t, func() { NewEncoding(alphabet) }) + }) +} + +func TestWithPadding(t *testing.T) { + t.Parallel() + + t.Run("valid_padding", func(t *testing.T) { + // use '.' since '=' is in the RFC1924 alphabet + enc := RFC1924.WithPadding('.') + require.NotNil(t, enc) + assert.Equal(t, '.', enc.padChar) + }) + + t.Run("no_padding", func(t *testing.T) { + enc := RFC1924.WithPadding(NoPadding) + require.NotNil(t, enc) + assert.Equal(t, NoPadding, enc.padChar) + }) + + t.Run("newline_padding", func(t *testing.T) { + assert.Panics(t, func() { RFC1924.WithPadding('\n') }) + }) + + t.Run("cr_padding", func(t *testing.T) { + assert.Panics(t, func() { RFC1924.WithPadding('\r') }) + }) + + t.Run("padding_in_alphabet", func(t *testing.T) { + assert.Panics(t, func() { + RFC1924.WithPadding('0') // '0' is in RFC1924 alphabet + }) + }) + + t.Run("non_ascii_padding", func(t *testing.T) { + assert.Panics(t, func() { + RFC1924.WithPadding('é') // non-ASCII character + }) + }) +} + +func TestEncodedLen(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + enc *Encoding + inputLen int + expected int + }{ + // no padding: 4 bytes -> 5 chars, partial: 1->2, 2->3, 3->4 + {"no_pad_zero", RFC1924, 0, 0}, + {"no_pad_one", RFC1924, 1, 2}, + {"no_pad_two", RFC1924, 2, 3}, + {"no_pad_three", RFC1924, 3, 4}, + {"no_pad_four", RFC1924, 4, 5}, + {"no_pad_five", RFC1924, 5, 7}, // 5 + 2 + {"no_pad_eight", RFC1924, 8, 10}, // 10 + + // with padding: always multiple of 5 + {"pad_zero", RFC1924.WithPadding('.'), 0, 0}, + {"pad_one", RFC1924.WithPadding('.'), 1, 5}, + {"pad_two", RFC1924.WithPadding('.'), 2, 5}, + {"pad_three", RFC1924.WithPadding('.'), 3, 5}, + {"pad_four", RFC1924.WithPadding('.'), 4, 5}, + {"pad_five", RFC1924.WithPadding('.'), 5, 10}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, tc.enc.EncodedLen(tc.inputLen)) + }) + } +} + +func TestDecodedLen(t *testing.T) { + t.Parallel() + + // DecodedLen returns max possible decoded bytes + // 5 chars -> 4 bytes, partial: 2->1, 3->2, 4->3, 1->0 + tests := []struct { + name string + encodedLen int + expected int + }{ + {"zero", 0, 0}, + {"one", 1, 0}, // 1 char can't decode to anything + {"two", 2, 1}, // 2 chars -> 1 byte + {"three", 3, 2}, // 3 chars -> 2 bytes + {"four", 4, 3}, // 4 chars -> 3 bytes + {"five", 5, 4}, // 5 chars -> 4 bytes + {"six", 6, 4}, // 5 + 1 -> 4 + 0 + {"seven", 7, 5}, // 5 + 2 -> 4 + 1 + {"ten", 10, 8}, // 2 full blocks + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, RFC1924.DecodedLen(tc.encodedLen)) + }) + } +} + +func TestCorruptInputError(t *testing.T) { + t.Parallel() + + err := CorruptInputError(42) + errStr := err.Error() + + assert.Contains(t, errStr, "42") +} + +func TestStreamEncoder(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + chunks [][]byte + }{ + {"single_chunk", [][]byte{[]byte("Hello, World!")}}, + {"multiple_chunks", [][]byte{[]byte("Hello"), []byte(", "), []byte("World!")}}, + {"byte_by_byte", [][]byte{{0x01}, {0x02}, {0x03}, {0x04}, {0x05}}}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // combine chunks for expected result + var combined []byte + for _, chunk := range tc.chunks { + combined = append(combined, chunk...) + } + + // encode via stream + var buf bytes.Buffer + encoder := NewEncoder(RFC1924, &buf) + for _, chunk := range tc.chunks { + n, err := encoder.Write(chunk) + require.NoError(t, err) + assert.Len(t, chunk, n) + } + err := encoder.Close() + require.NoError(t, err) + + // decode and verify + decoded, err := RFC1924.DecodeString(buf.String()) + require.NoError(t, err) + assert.Equal(t, combined, decoded) + }) + } +} + +func TestStreamDecoder(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input []byte + }{ + {"empty", []byte{}}, + {"simple", []byte("Hello, World!")}, + {"binary", []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}}, + {"longer_data", bytes.Repeat([]byte("test"), 100)}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // encode first + encoded := RFC1924.EncodeToString(tc.input) + + // decode via stream + decoder := NewDecoder(RFC1924, bytes.NewReader([]byte(encoded))) + decoded, err := io.ReadAll(decoder) + + require.NoError(t, err) + assert.Equal(t, tc.input, decoded) + }) + } +} + +func TestEncodeDirectBuffer(t *testing.T) { + t.Parallel() + + input := []byte("test data") + dst := make([]byte, RFC1924.EncodedLen(len(input))) + + RFC1924.Encode(dst, input) + + decoded, err := RFC1924.DecodeString(string(dst)) + require.NoError(t, err) + assert.Equal(t, input, decoded) +} + +func TestDecodeDirectBuffer(t *testing.T) { + t.Parallel() + + input := []byte("test data") + encoded := RFC1924.EncodeToString(input) + + dst := make([]byte, RFC1924.DecodedLen(len(encoded))) + n, err := RFC1924.Decode(dst, []byte(encoded)) + + require.NoError(t, err) + assert.Len(t, input, n) + assert.Equal(t, input, dst[:n]) +} + +func TestRFC1924AppendEncode(t *testing.T) { + t.Parallel() + + t.Run("append_to_empty", func(t *testing.T) { + input := []byte("test") + result := RFC1924.AppendEncode(nil, input) + + expected := RFC1924.EncodeToString(input) + assert.Equal(t, expected, string(result)) + }) + + t.Run("append_to_existing", func(t *testing.T) { + prefix := []byte("prefix:") + input := []byte("test") + result := RFC1924.AppendEncode(prefix, input) + + expected := "prefix:" + RFC1924.EncodeToString(input) + assert.Equal(t, expected, string(result)) + }) + + t.Run("append_empty_input", func(t *testing.T) { + prefix := []byte("prefix:") + result := RFC1924.AppendEncode(prefix, nil) + + assert.Equal(t, "prefix:", string(result)) + }) + + t.Run("preserves_original_capacity", func(t *testing.T) { + prefix := make([]byte, 0, 100) + prefix = append(prefix, "start:"...) + input := []byte("data") + + result := RFC1924.AppendEncode(prefix, input) + + // verify original slice not modified + assert.Len(t, prefix, 6) + assert.Greater(t, len(result), len(prefix)) + }) +} + +func TestRFC1924AppendDecode(t *testing.T) { + t.Parallel() + + t.Run("append_to_empty", func(t *testing.T) { + input := []byte("test") + encoded := RFC1924.EncodeToString(input) + + result, err := RFC1924.AppendDecode(nil, []byte(encoded)) + + require.NoError(t, err) + assert.Equal(t, input, result) + }) + + t.Run("append_to_existing", func(t *testing.T) { + prefix := []byte("prefix:") + input := []byte("test") + encoded := RFC1924.EncodeToString(input) + + result, err := RFC1924.AppendDecode(prefix, []byte(encoded)) + + require.NoError(t, err) + expected := append([]byte("prefix:"), input...) + assert.Equal(t, expected, result) + }) + + t.Run("append_empty_input", func(t *testing.T) { + prefix := []byte("prefix:") + result, err := RFC1924.AppendDecode(prefix, nil) + + require.NoError(t, err) + assert.Equal(t, "prefix:", string(result)) + }) + + t.Run("corrupt_input_returns_partial", func(t *testing.T) { + prefix := []byte("prefix:") + // valid block + invalid char + input := []byte("test") + encoded := RFC1924.EncodeToString(input) + "[[[" + + result, err := RFC1924.AppendDecode(prefix, []byte(encoded)) + + require.Error(t, err) + // should return prefix + successfully decoded data + assert.GreaterOrEqual(t, len(result), len(prefix)) + }) +} + +type errorWriter struct { + n int + err error +} + +func (w *errorWriter) Write(p []byte) (int, error) { + return w.n, w.err +} + +func TestStreamEncoderWriteError(t *testing.T) { + t.Parallel() + + t.Run("error_on_full_block", func(t *testing.T) { + w := &errorWriter{err: io.ErrShortWrite} + encoder := NewEncoder(RFC1924, w) + + _, err := encoder.Write([]byte("test")) + require.Error(t, err) + + // subsequent writes should fail + _, err = encoder.Write([]byte("more")) + require.Error(t, err) + }) + + t.Run("error_on_buffered_flush", func(t *testing.T) { + w := &errorWriter{err: io.ErrShortWrite} + encoder := NewEncoder(RFC1924, w) + + // write less than 4 bytes (buffers) + _, err := encoder.Write([]byte("ab")) + require.NoError(t, err) + + // write more to trigger flush + _, err = encoder.Write([]byte("cdef")) + require.Error(t, err) + }) + + t.Run("error_on_close", func(t *testing.T) { + w := &errorWriter{err: io.ErrShortWrite} + encoder := NewEncoder(RFC1924, w) + + // write less than 4 bytes + _, err := encoder.Write([]byte("ab")) + require.NoError(t, err) + + // close should fail when flushing remaining + err = encoder.Close() + require.Error(t, err) + + // close again should return same error + err = encoder.Close() + require.Error(t, err) + }) + + t.Run("close_without_write", func(t *testing.T) { + var buf bytes.Buffer + encoder := NewEncoder(RFC1924, &buf) + + err := encoder.Close() + require.NoError(t, err) + assert.Empty(t, buf.String()) + }) +} + +type errorReader struct { + data []byte + err error +} + +func (r *errorReader) Read(p []byte) (int, error) { + if len(r.data) == 0 { + return 0, r.err + } + n := copy(p, r.data) + r.data = r.data[n:] + if len(r.data) == 0 && r.err != nil { + return n, r.err + } + return n, nil +} + +// zeroReader returns (0, nil) a specified number of times before returning actual data. +type zeroReader struct { + data []byte + zeroReads int + chunkSize int + zerosDone int + betweenAll bool // if true, return zero read between every chunk +} + +func (r *zeroReader) Read(p []byte) (int, error) { + if len(r.data) == 0 { + return 0, io.EOF + } + // return (0, nil) the specified number of times at the start + if r.zerosDone < r.zeroReads { + r.zerosDone++ + return 0, nil + } + // optionally reset for next chunk + if r.betweenAll { + r.zerosDone = 0 + } + n := r.chunkSize + if n > len(r.data) { + n = len(r.data) + } + if n > len(p) { + n = len(p) + } + copy(p, r.data[:n]) + r.data = r.data[n:] + return n, nil +} + +// chunkReader splits data into chunks of specified size to test buffering behavior. +type chunkReader struct { + data []byte + chunkSize int +} + +func (r *chunkReader) Read(p []byte) (int, error) { + if len(r.data) == 0 { + return 0, io.EOF + } + n := r.chunkSize + if n > len(r.data) { + n = len(r.data) + } + if n > len(p) { + n = len(p) + } + copy(p, r.data[:n]) + r.data = r.data[n:] + return n, nil +} + +func TestStreamDecoderPartialBlockBuffering(t *testing.T) { + t.Parallel() + + // This test verifies that the decoder correctly handles reads that split + // in the middle of a 5-character encoded block. + input := []byte("Hello, World! This is a longer test string.") + encoded := RFC1924.EncodeToString(input) + + tests := []struct { + name string + chunkSize int + }{ + {"chunk_size_1", 1}, + {"chunk_size_3", 3}, + {"chunk_size_7", 7}, // splits mid-block (7 = 5 + 2) + {"chunk_size_11", 11}, // splits mid-block (11 = 10 + 1) + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + reader := &chunkReader{data: []byte(encoded), chunkSize: tc.chunkSize} + decoder := NewDecoder(RFC1924, reader) + decoded, err := io.ReadAll(decoder) + + require.NoError(t, err) + assert.Equal(t, input, decoded) + }) + } +} + +// dataWithErrorReader returns all data with an error in a single Read call. +type dataWithErrorReader struct { + data []byte + err error + read bool +} + +func (r *dataWithErrorReader) Read(p []byte) (int, error) { + if r.read { + return 0, r.err + } + r.read = true + n := copy(p, r.data) + return n, r.err +} + +// whitespaceOnlyReader returns only whitespace, then an error. +type whitespaceOnlyReader struct { + returned bool + err error +} + +func (r *whitespaceOnlyReader) Read(p []byte) (int, error) { + if r.returned { + return 0, r.err + } + r.returned = true + // return whitespace that will be filtered out + data := []byte(" \t\n\r ") + n := copy(p, data) + return n, nil +} + +// whitespaceWithErrorReader returns whitespace with a non-EOF error on first read. +type whitespaceWithErrorReader struct { + called bool + err error +} + +func (r *whitespaceWithErrorReader) Read(p []byte) (int, error) { + if r.called { + return 0, r.err + } + r.called = true + // return whitespace that filters to empty, with error + data := []byte(" \t\n\r ") + n := copy(p, data) + return n, r.err +} + +func TestStreamDecoderReadError(t *testing.T) { + t.Parallel() + + t.Run("read_error", func(t *testing.T) { + r := &errorReader{err: io.ErrUnexpectedEOF} + decoder := NewDecoder(RFC1924, r) + + buf := make([]byte, 100) + _, err := decoder.Read(buf) + require.Error(t, err) + + // subsequent reads should return same error + _, err = decoder.Read(buf) + require.Error(t, err) + }) + + t.Run("error_after_whitespace_only", func(t *testing.T) { + // reader returns only whitespace (filtered to empty) then error + r := &whitespaceOnlyReader{err: io.ErrUnexpectedEOF} + decoder := NewDecoder(RFC1924, r) + + buf := make([]byte, 100) + _, err := decoder.Read(buf) + require.ErrorIs(t, err, io.ErrUnexpectedEOF) + }) + + t.Run("whitespace_with_error_not_eof", func(t *testing.T) { + // reader returns whitespace with error in same read (filters to empty, not EOF) + r := &whitespaceWithErrorReader{err: io.ErrUnexpectedEOF} + decoder := NewDecoder(RFC1924, r) + + buf := make([]byte, 100) + _, err := decoder.Read(buf) + require.ErrorIs(t, err, io.ErrUnexpectedEOF) + }) + + t.Run("decode_error_with_partial_data_small_buffer", func(t *testing.T) { + // This tests the error path where some data is decoded before + // the error, and the output buffer is too small. Uses padding mode + // to trigger an error mid-stream after valid blocks are decoded. + paddedEnc := RFC1924.WithPadding('.') + + // Create input: valid padded block + block with non-contiguous padding + // Valid: 4 bytes = 5 chars (full block) + valid := paddedEnc.EncodeToString([]byte{0x01, 0x02, 0x03, 0x04}) + // Invalid: "AB.C." has non-contiguous padding (all chars valid, but padding broken) + // The 'C' is a valid alphabet char, so it passes stream filter but fails + // decodeFiltered's padding validation at line 297-299 + invalid := "AB.C." + + allData := valid + invalid + + // Use a reader that returns all data at once with EOF + decoder := NewDecoder(paddedEnc, bytes.NewReader([]byte(allData))) + + // Use small buffer to trigger buffering on decode error + buf := make([]byte, 2) + + n, err := decoder.Read(buf) + // Should decode 4 bytes from valid block before hitting error + // But buffer only holds 2, so should buffer the rest and defer error + require.Equal(t, 2, n) + require.NoError(t, err, "error should be deferred") + + // Continue reading to get buffered data + var result []byte + result = append(result, buf[:n]...) + for { + n, err = decoder.Read(buf) + if n > 0 { + result = append(result, buf[:n]...) + } + if err != nil { + break + } + } + + // Should have gotten the 4 valid bytes + assert.Equal(t, []byte{0x01, 0x02, 0x03, 0x04}, result) + // And eventually hit the error + assert.Error(t, err) + }) + + t.Run("data_with_error", func(t *testing.T) { + // encode some data + input := []byte("Hello, World!") + encoded := RFC1924.EncodeToString(input) + + // reader returns all data with an error in single read + r := &dataWithErrorReader{data: []byte(encoded), err: io.ErrUnexpectedEOF} + decoder := NewDecoder(RFC1924, r) + + // should get data first despite error + decoded, err := io.ReadAll(decoder) + + // data should be decoded successfully + assert.Equal(t, input, decoded) + // error should be returned after data exhausted + assert.ErrorIs(t, err, io.ErrUnexpectedEOF) + }) + + t.Run("small_output_buffer", func(t *testing.T) { + encoded := RFC1924.EncodeToString([]byte("Hello, World!")) + decoder := NewDecoder(RFC1924, bytes.NewReader([]byte(encoded))) + + // read with very small buffer to trigger buffering + buf := make([]byte, 2) + var result []byte + for { + n, err := decoder.Read(buf) + result = append(result, buf[:n]...) + if err == io.EOF { + break + } + require.NoError(t, err) + } + + assert.Equal(t, []byte("Hello, World!"), result) + }) + + t.Run("corrupt_input", func(t *testing.T) { + // use invalid characters for RFC1924 + decoder := NewDecoder(RFC1924, bytes.NewReader([]byte("[[["))) + + buf := make([]byte, 100) + _, err := decoder.Read(buf) + assert.Error(t, err) + }) + + t.Run("buffered_data_with_error", func(t *testing.T) { + // use input that encodes to complete 5-char blocks (8 bytes = 10 chars = 2 blocks) + input := []byte("12345678") + encoded := RFC1924.EncodeToString(input) + require.Len(t, encoded, 10) // verify our assumption + + // add a single trailing valid character - this creates an invalid 1-char partial block + // (minimum 2 chars needed to decode 1 byte) + encodedWithError := encoded + "0" + + // use a small read buffer to force outBuf buffering + decoder := NewDecoder(RFC1924, bytes.NewReader([]byte(encodedWithError))) + buf := make([]byte, 2) + + var result []byte + var lastErr error + for { + n, err := decoder.Read(buf) + if n > 0 { + result = append(result, buf[:n]...) + } + if err != nil { + lastErr = err + break + } + } + + // should have decoded the valid portion (8 bytes from 10 chars) + assert.Equal(t, input, result) + // should have received a corrupt input error for the trailing char + var corruptErr CorruptInputError + require.ErrorAs(t, lastErr, &corruptErr) + }) + + t.Run("error_with_small_buffer_returns_bytes", func(t *testing.T) { + // This test verifies that when a decode error occurs and the output + // buffer is smaller than decoded data, Read correctly returns the + // number of bytes written (not 0) and defers the error. + input := []byte("12345678") // 8 bytes -> 10 encoded chars + encoded := RFC1924.EncodeToString(input) + + // append invalid trailing char to cause error after decoding valid data + encodedWithError := encoded + "0" + + decoder := NewDecoder(RFC1924, bytes.NewReader([]byte(encodedWithError))) + + // use buffer smaller than decoded output (8 bytes) to trigger buffering + buf := make([]byte, 3) + + // first read should return bytes, not error (error deferred) + n, err := decoder.Read(buf) + require.Equal(t, 3, n, "should return actual bytes written, not 0") + require.NoError(t, err, "error should be deferred until buffer drained") + require.Equal(t, input[:3], buf[:n]) + + // continue reading to drain buffer + var result []byte + result = append(result, buf[:n]...) + for { + n, err = decoder.Read(buf) + if n > 0 { + result = append(result, buf[:n]...) + } + if err != nil { + break + } + } + + // all valid data should be returned before error + assert.Equal(t, input, result) + var corruptErr CorruptInputError + assert.ErrorAs(t, err, &corruptErr) + }) +} + +func TestStreamDecoderZeroReads(t *testing.T) { + t.Parallel() + + input := []byte("Hello, World!") + encoded := RFC1924.EncodeToString(input) + + tests := []struct { + name string + zeroReads int + chunkSize int + betweenAll bool + }{ + {"zeros_at_start", 3, 10, false}, + {"zeros_between_chunks", 1, 5, true}, + {"many_zeros_at_start", 10, 20, false}, + {"single_byte_with_zeros", 2, 1, true}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + reader := &zeroReader{ + data: []byte(encoded), + zeroReads: tc.zeroReads, + chunkSize: tc.chunkSize, + betweenAll: tc.betweenAll, + } + decoder := NewDecoder(RFC1924, reader) + decoded, err := io.ReadAll(decoder) + + require.NoError(t, err) + assert.Equal(t, input, decoded) + }) + } +} + +func TestPaddedDecoding(t *testing.T) { + t.Parallel() + + paddedEnc := RFC1924.WithPadding('.') + + t.Run("concatenated_padded_blocks", func(t *testing.T) { + // encode two 1-byte values separately with padding + encoded1 := paddedEnc.EncodeToString([]byte{0x42}) // 2 chars + 3 padding + encoded2 := paddedEnc.EncodeToString([]byte{0x43}) // 2 chars + 3 padding + + concatenated := encoded1 + encoded2 + + // should decode both blocks + decoded, err := paddedEnc.DecodeString(concatenated) + require.NoError(t, err) + assert.Equal(t, []byte{0x42, 0x43}, decoded) + }) + + t.Run("padded_blocks_with_whitespace", func(t *testing.T) { + encoded1 := paddedEnc.EncodeToString([]byte{0x42}) + encoded2 := paddedEnc.EncodeToString([]byte{0x43}) + + withWhitespace := encoded1 + " \t\n" + encoded2 + + decoded, err := paddedEnc.DecodeString(withWhitespace) + require.NoError(t, err) + assert.Equal(t, []byte{0x42, 0x43}, decoded) + }) + + t.Run("invalid_padding_positions", func(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + }{ + {"padding_at_start", "....."}, + {"padding_in_middle", "AB.CD"}, // padding not at end of block + {"non_contiguous", "AB.C."}, // padding chars separated by non-padding + {"single_padding_remainder", "."}, // 1-char remainder with padding + {"two_padding_remainder", "AB...X"}, // valid block + padding in remainder + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + _, err := paddedEnc.DecodeString(tc.input) + assert.Error(t, err) + }) + } + }) + + t.Run("invalid_char_in_padded_block", func(t *testing.T) { + // '[' is not in RFC1924 alphabet - test invalid char in data portion of block + _, err := paddedEnc.DecodeString("AB[..") + assert.Error(t, err) + }) + + t.Run("padding_in_trailing_bytes", func(t *testing.T) { + // valid 5-char block followed by padding in trailing portion + encoded := paddedEnc.EncodeToString([]byte{0x01, 0x02, 0x03, 0x04}) // 5 chars, no padding + _, err := paddedEnc.DecodeString(encoded + "A.") // trailing 2 chars with padding + assert.Error(t, err) + }) + + t.Run("invalid_char_in_trailing_bytes", func(t *testing.T) { + // valid 5-char block followed by invalid char in trailing portion + encoded := RFC1924.EncodeToString([]byte{0x01, 0x02, 0x03, 0x04}) // 5 chars + _, err := RFC1924.DecodeString(encoded + "A[") // '[' is invalid + assert.Error(t, err) + }) + + t.Run("stream_concatenated_blocks", func(t *testing.T) { + encoded1 := paddedEnc.EncodeToString([]byte{0x42}) + encoded2 := paddedEnc.EncodeToString([]byte{0x43}) + concatenated := encoded1 + encoded2 + + decoder := NewDecoder(paddedEnc, bytes.NewReader([]byte(concatenated))) + decoded, err := io.ReadAll(decoder) + + require.NoError(t, err) + assert.Equal(t, []byte{0x42, 0x43}, decoded) + }) + + t.Run("stream_padding_split_across_reads", func(t *testing.T) { + encoded := paddedEnc.EncodeToString([]byte{0x42, 0x43}) // 2 bytes -> 3 chars + 2 padding + + // split in middle of padding region + reader := &chunkReader{data: []byte(encoded), chunkSize: 4} + decoder := NewDecoder(paddedEnc, reader) + decoded, err := io.ReadAll(decoder) + + require.NoError(t, err) + assert.Equal(t, []byte{0x42, 0x43}, decoded) + }) + + t.Run("stream_with_whitespace_between_padded", func(t *testing.T) { + encoded1 := paddedEnc.EncodeToString([]byte{0x42}) + encoded2 := paddedEnc.EncodeToString([]byte{0x43}) + withWhitespace := encoded1 + " \n " + encoded2 + + decoder := NewDecoder(paddedEnc, bytes.NewReader([]byte(withWhitespace))) + decoded, err := io.ReadAll(decoder) + + require.NoError(t, err) + assert.Equal(t, []byte{0x42, 0x43}, decoded) + }) + + // stream decoder tests for decodeFiltered error paths + t.Run("stream_dataLen_zero", func(t *testing.T) { + // "....." has 0 data chars - triggers dataLen < 2 check + decoder := NewDecoder(paddedEnc, bytes.NewReader([]byte("....."))) + _, err := io.ReadAll(decoder) + assert.Error(t, err) + }) + + t.Run("stream_dataLen_one", func(t *testing.T) { + // "A...." has 1 data char - triggers dataLen < 2 check + decoder := NewDecoder(paddedEnc, bytes.NewReader([]byte("A...."))) + _, err := io.ReadAll(decoder) + assert.Error(t, err) + }) + + t.Run("stream_invalid_char_in_padded_block", func(t *testing.T) { + // "[B..." has invalid char '[' in data portion of padded block + decoder := NewDecoder(paddedEnc, bytes.NewReader([]byte("[B..."))) + _, err := io.ReadAll(decoder) + assert.Error(t, err) + }) + + t.Run("stream_padding_in_remainder", func(t *testing.T) { + // valid 5-char block followed by remainder with padding + valid := paddedEnc.EncodeToString([]byte{0x01, 0x02, 0x03, 0x04}) // 5 chars + withPaddingRemainder := valid + "A." // remainder has padding + decoder := NewDecoder(paddedEnc, bytes.NewReader([]byte(withPaddingRemainder))) + _, err := io.ReadAll(decoder) + assert.Error(t, err) + }) + + t.Run("stream_invalid_char_in_remainder", func(t *testing.T) { + // valid 5-char block followed by remainder with invalid char + valid := RFC1924.EncodeToString([]byte{0x01, 0x02, 0x03, 0x04}) // 5 chars + withInvalidRemainder := valid + "A[" // '[' is invalid + decoder := NewDecoder(RFC1924, bytes.NewReader([]byte(withInvalidRemainder))) + _, err := io.ReadAll(decoder) + assert.Error(t, err) + }) +} + +func TestDecodeWhitespaceAlphabetAware(t *testing.T) { + t.Parallel() + + // create encoding with space in the alphabet (replaces '~' which is the last char in RFC1924) + alphabetWithSpace := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|} " + encWithSpace := NewEncoding(alphabetWithSpace) + + // create encoding with tab in the alphabet (replaces '~') + alphabetWithTab := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}\t" + encWithTab := NewEncoding(alphabetWithTab) + + t.Run("space_in_alphabet_not_skipped", func(t *testing.T) { + // space is at alphabet position 84 (value 84) + // create a 5-char encoded block where the last char is space (value 84) + // value % 85 = 84 means value = 84, 84+85=169, 84+85*2=254, etc. + // for simplicity, encode a full block where value 84 appears + // we'll construct a known encoded string with space and decode it + + // "00000" decodes to: 0*85^4 + 0*85^3 + 0*85^2 + 0*85 + 0 = 0 -> [0,0,0,0] + // "0000 " (with space = 84) decodes to: 0 + 0 + 0 + 0 + 84 = 84 -> different bytes + + // first verify that space in encoded data produces different result than without space + decoded1, err := encWithSpace.DecodeString("00000") + require.NoError(t, err) + + decoded2, err := encWithSpace.DecodeString("0000 ") // space = value 84 + require.NoError(t, err) + + assert.NotEqual(t, decoded1, decoded2) // space in alphabet should decode differently than '0' + + // round-trip with data that encodes to include spaces + input := []byte{0x00, 0x00, 0x00, 0x54} // 0x54 = 84, should encode with space + encoded := encWithSpace.EncodeToString(input) + decoded, err := encWithSpace.DecodeString(encoded) + require.NoError(t, err) + assert.Equal(t, input, decoded) + }) + + t.Run("tab_in_alphabet_not_skipped", func(t *testing.T) { + // similar test with tab + decoded1, err := encWithTab.DecodeString("00000") + require.NoError(t, err) + + decoded2, err := encWithTab.DecodeString("0000\t") // tab = value 84 + require.NoError(t, err) + + assert.NotEqual(t, decoded1, decoded2) // tab in alphabet should decode differently than '0' + + // round-trip + input := []byte{0x00, 0x00, 0x00, 0x54} + encoded := encWithTab.EncodeToString(input) + decoded, err := encWithTab.DecodeString(encoded) + require.NoError(t, err) + assert.Equal(t, input, decoded) + }) + + t.Run("space_not_in_alphabet_skipped", func(t *testing.T) { + // RFC1924 does not include space, so spaces should be ignored + input := []byte{0x01, 0x02, 0x03, 0x04} + encoded := RFC1924.EncodeToString(input) + + // insert spaces between each character + var sb strings.Builder + for i, c := range encoded { + if i > 0 { + sb.WriteByte(' ') + } + sb.WriteRune(c) + } + + decoded, err := RFC1924.DecodeString(sb.String()) + require.NoError(t, err) + assert.Equal(t, input, decoded) // space not in alphabet should be skipped + }) + + t.Run("tab_not_in_alphabet_skipped", func(t *testing.T) { + input := []byte{0x01, 0x02, 0x03, 0x04} + encoded := RFC1924.EncodeToString(input) + + // insert tabs + withTabs := encoded[:2] + "\t\t" + encoded[2:] + decoded, err := RFC1924.DecodeString(withTabs) + require.NoError(t, err) + assert.Equal(t, input, decoded) // tab not in alphabet should be skipped + }) + + t.Run("stream_decoder_space_in_alphabet", func(t *testing.T) { + input := []byte("test data for stream") + encoded := encWithSpace.EncodeToString(input) + + decoder := NewDecoder(encWithSpace, bytes.NewReader([]byte(encoded))) + decoded, err := io.ReadAll(decoder) + + require.NoError(t, err) + assert.Equal(t, input, decoded) + }) + + t.Run("stream_decoder_whitespace_skipped", func(t *testing.T) { + input := []byte("test data") + encoded := RFC1924.EncodeToString(input) + + // insert whitespace between blocks (every 5 chars) + var sb strings.Builder + for i, c := range encoded { + sb.WriteRune(c) + if (i+1)%5 == 0 { + sb.WriteString(" \t\n\r") + } + } + + decoder := NewDecoder(RFC1924, bytes.NewReader([]byte(sb.String()))) + decoded, err := io.ReadAll(decoder) + + require.NoError(t, err) + assert.Equal(t, input, decoded) + }) +} diff --git a/base85/fuzz_test.go b/base85/fuzz_test.go new file mode 100644 index 0000000..3bd56a0 --- /dev/null +++ b/base85/fuzz_test.go @@ -0,0 +1,196 @@ +package base85 + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func FuzzEncode(f *testing.F) { + f.Add([]byte{}) + f.Add([]byte{0}) + f.Add([]byte{255}) + f.Add([]byte("Hello, World!")) + f.Add([]byte("The quick brown fox jumps over the lazy dog")) + + // block boundary cases (base85 works in 4-byte blocks) + f.Add([]byte{1, 2, 3, 4}) // exact block + f.Add([]byte{1, 2, 3, 4, 5}) // one block + 1 + f.Add([]byte{1, 2, 3, 4, 5, 6}) // one block + 2 + f.Add([]byte{1, 2, 3, 4, 5, 6, 7}) // one block + 3 + + // partial blocks + f.Add([]byte{1}) + f.Add([]byte{1, 2}) + f.Add([]byte{1, 2, 3}) + + // all zeros and all ones + f.Add([]byte{0, 0, 0, 0}) + f.Add([]byte{255, 255, 255, 255}) + + // binary data patterns + f.Add([]byte{0x00, 0x7F, 0x80, 0xFF}) + f.Add([]byte{0xDE, 0xAD, 0xBE, 0xEF}) + + f.Fuzz(func(t *testing.T, data []byte) { + encoded := RFC1924.EncodeToString(data) // should never panic + + // round-trip should recover original data + decoded, err := RFC1924.DecodeString(encoded) + require.NoError(t, err) + assert.Equal(t, data, decoded) + }) +} + +func FuzzDecode(f *testing.F) { + f.Add([]byte{}) + + // valid RFC1924 encoded strings + f.Add([]byte("0")) // partial block (invalid - too short) + f.Add([]byte("00")) // minimal valid partial block + f.Add([]byte("0000000000")) // two full blocks worth + + // encoded "Hello" + f.Add([]byte("BOu!rDZ")) + + // whitespace handling + f.Add([]byte("BOu!r DZ")) + f.Add([]byte("BOu!r\nDZ")) + f.Add([]byte("BOu!r\tDZ")) + f.Add([]byte(" BOu!rDZ ")) + + // invalid characters (should return error, not panic) + f.Add([]byte("invalid\"chars")) + f.Add([]byte{0x00, 0x01, 0x02}) + f.Add([]byte{0xFF, 0xFE, 0xFD}) + + // boundary alphabet characters + f.Add([]byte("0")) // first alphabet char + f.Add([]byte("~")) // last alphabet char + f.Add([]byte("09AZaz!#$%&()")) // mixed alphabet chars + + f.Fuzz(func(t *testing.T, data []byte) { + _, _ = RFC1924.DecodeString(string(data)) // should never panic (errors are acceptable) + }) +} + +func FuzzEncodeWithPadding(f *testing.F) { + f.Add([]byte{}) + f.Add([]byte("test")) + f.Add([]byte("hello world")) + + // partial blocks that need padding + f.Add([]byte{1}) + f.Add([]byte{1, 2}) + f.Add([]byte{1, 2, 3}) + + // exact blocks (no padding needed) + f.Add([]byte{1, 2, 3, 4}) + f.Add([]byte{1, 2, 3, 4, 5, 6, 7, 8}) + + f.Fuzz(func(t *testing.T, data []byte) { + // use '.' as padding since '=' is in RFC1924 alphabet + enc := RFC1924.WithPadding('.') + + // encoding should never panic + encoded := enc.EncodeToString(data) + + // with padding, length should always be multiple of 5 + if len(data) > 0 && len(encoded)%5 != 0 { + t.Errorf("padded encoding length %d not multiple of 5", len(encoded)) + } + + // round-trip should recover original data + decoded, err := enc.DecodeString(encoded) + require.NoError(t, err) + assert.Equal(t, data, decoded) + }) +} + +func FuzzStreamRoundTrip(f *testing.F) { + f.Add([]byte{}) + f.Add([]byte{1}) + f.Add([]byte{1, 2, 3, 4}) + f.Add([]byte{1, 2, 3, 4, 5}) + f.Add(make([]byte, 100)) + f.Add([]byte("The quick brown fox jumps over the lazy dog")) + + f.Fuzz(func(t *testing.T, data []byte) { + // encode using stream encoder + var encoded bytes.Buffer + encoder := NewEncoder(RFC1924, &encoded) + _, err := encoder.Write(data) + require.NoError(t, err) + require.NoError(t, encoder.Close()) + + // decode using stream decoder + decoder := NewDecoder(RFC1924, &encoded) + var decoded bytes.Buffer + _, err = decoded.ReadFrom(decoder) + require.NoError(t, err) + assert.Equal(t, data, decoded.Bytes()) + }) +} + +func FuzzDecodeWithPadding(f *testing.F) { + f.Add([]byte{}) + + // use '.' as padding since '=' is in RFC1924 alphabet + enc := RFC1924.WithPadding('.') + + // valid padded encodings + f.Add([]byte("00...")) // 1 byte padded + f.Add([]byte("000..")) // 2 bytes padded + f.Add([]byte("0000.")) // 3 bytes padded + f.Add([]byte("00000")) // full block, no padding + f.Add([]byte("0000000000")) // two full blocks + f.Add([]byte("00000000..")) // one full + 2 bytes padded + f.Add([]byte("BOu!rDZ..")) // encoded "Hello" with padding + + // invalid padding patterns (should error, not panic) + f.Add([]byte(".....")) // all padding + f.Add([]byte("0....")) // only 1 data char + f.Add([]byte("00.0.")) // padding in middle + f.Add([]byte("...00")) // padding at start + + // mixed valid/invalid + f.Add([]byte("00000.....")) // full block + all padding block + f.Add([]byte{0x00, 0x01, '.', '.', '.'}) + + f.Fuzz(func(t *testing.T, data []byte) { + _, _ = enc.DecodeString(string(data)) // should never panic (errors are acceptable) + }) +} + +func FuzzStreamRoundTripWithPadding(f *testing.F) { + // various sizes to test buffering with padding + f.Add([]byte{}) + f.Add([]byte{1}) + f.Add([]byte{1, 2}) + f.Add([]byte{1, 2, 3}) + f.Add([]byte{1, 2, 3, 4}) + f.Add([]byte{1, 2, 3, 4, 5}) + f.Add(make([]byte, 100)) + f.Add([]byte("The quick brown fox jumps over the lazy dog")) + + f.Fuzz(func(t *testing.T, data []byte) { + // use '.' as padding since '=' is in RFC1924 alphabet + enc := RFC1924.WithPadding('.') + + // encode using stream encoder + var encoded bytes.Buffer + encoder := NewEncoder(enc, &encoded) + _, err := encoder.Write(data) + require.NoError(t, err) + require.NoError(t, encoder.Close()) + + // decode using stream decoder + decoder := NewDecoder(enc, &encoded) + var decoded bytes.Buffer + _, err = decoded.ReadFrom(decoder) + require.NoError(t, err) + assert.Equal(t, data, decoded.Bytes()) + }) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..cb4d688 --- /dev/null +++ b/go.mod @@ -0,0 +1,11 @@ +module github.com/go-analyze/encoding + +go 1.18 + +require github.com/stretchr/testify v1.11.1 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..c4c1710 --- /dev/null +++ b/go.sum @@ -0,0 +1,10 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=