diff --git a/README.md b/README.md index 087d3a4..d151a1e 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ package main import ( "encoding/hex" "fmt" + "github.com/capitalone/fpe" "github.com/capitalone/fpe/ff1" ) @@ -52,9 +53,12 @@ func main() { panic(err) } + // The alphabet can contain up 65536 unique characters constructed from a Utf-8 string. + alphabet := "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+-_=[]{}!$%^&*() \u2318" + // Create a new FF1 cipher "object" - // 10 is the radix/base, and 8 is the tweak length. - FF1, err := ff1.NewCipher(10, 8, key, tweak) + // Alphabet defines the supported character set, and 8 is the tweak length. + FF1, err := ff1.NewCipherWithAlphabet(alphabet, 8, key, tweak) if err != nil { panic(err) } diff --git a/ff1/ff1.go b/ff1/ff1.go index c34c9c8..477610c 100644 --- a/ff1/ff1.go +++ b/ff1/ff1.go @@ -26,9 +26,10 @@ import ( "crypto/cipher" "encoding/binary" "errors" + "fmt" + "github.com/capitalone/fpe/fpeUtils" "math" "math/big" - "strings" ) // Note that this is strictly following the official NIST spec guidelines. In the linked PDF Appendix A (README.md), NIST recommends that radix^minLength >= 1,000,000. If you would like to follow that, change this parameter. @@ -61,6 +62,7 @@ type cbcMode interface { // using a particular key, radix, and tweak type Cipher struct { tweak []byte + codec fpeUtils.Codec radix int minLen uint32 maxLen uint32 @@ -70,9 +72,19 @@ type Cipher struct { cbcEncryptor cipher.BlockMode } -// NewCipher initializes a new FF1 Cipher for encryption or decryption use -// based on the radix, max tweak length, key and tweak parameters. +const ( + // from func (*big.Int)SetString + legacyAlphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRTSUVWXYZ" +) + +// NewCipher is provided for backwards compatibility for old client code. func NewCipher(radix int, maxTLen int, key []byte, tweak []byte) (Cipher, error) { + return NewCipherWithAlphabet(legacyAlphabet[:radix], maxTLen, key, tweak) +} + +// NewAlphaCipher initializes a new FF1 Cipher for encryption or decryption use +// based on the alphabet, max tweak length, key and tweak parameters. +func NewCipherWithAlphabet(alphabet string, maxTLen int, key []byte, tweak []byte) (Cipher, error) { var newCipher Cipher keyLen := len(key) @@ -82,10 +94,16 @@ func NewCipher(radix int, maxTLen int, key []byte, tweak []byte) (Cipher, error) return newCipher, errors.New("key length must be 128, 192, or 256 bits") } - // While FF1 allows radices in [2, 2^16], - // realistically there's a practical limit based on the alphabet that can be passed in - if (radix < 2) || (radix > big.MaxBase) { - return newCipher, errors.New("radix must be between 2 and 36, inclusive") + codec, err := fpeUtils.NewCodec(alphabet) + if err != nil { + return newCipher, fmt.Errorf("error making codec: %s", err) + } + + radix := codec.Radix() + + // FF1 allows radices in [2, 2^16], + if (radix < 2) || (radix > 65536) { + return newCipher, fmt.Errorf("radix must be between 2 and 65536: %d supplied", radix) } // Make sure the length of given tweak is in range @@ -98,8 +116,8 @@ func NewCipher(radix int, maxTLen int, key []byte, tweak []byte) (Cipher, error) var maxLen uint32 = math.MaxUint32 - // Make sure 2 <= minLength <= maxLength < 2^32 is satisfied - if (minLen < 2) || (maxLen < minLen) || (maxLen > math.MaxUint32) { + // Make sure minLength <= maxLength < 2^32 is satisfied + if (maxLen < minLen) || (maxLen > math.MaxUint32) { return newCipher, errors.New("minLen invalid, adjust your radix") } @@ -112,7 +130,7 @@ func NewCipher(radix int, maxTLen int, key []byte, tweak []byte) (Cipher, error) cbcEncryptor := cipher.NewCBCEncrypter(aesBlock, ivZero) newCipher.tweak = tweak - newCipher.radix = radix + newCipher.codec = codec newCipher.minLen = minLen newCipher.maxLen = maxLen newCipher.maxTLen = maxTLen @@ -135,9 +153,16 @@ func (c Cipher) Encrypt(X string) (string, error) { func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { var ret string var err error - var ok bool - n := uint32(len(X)) + // String X contains a sequence of characters, where some characters + // might take up multiple bytes. Convert into an array of indices into + // the alphabet embedded in the codec. + Xn, err := c.codec.Encode(X) + if err != nil { + return ret, ErrStringNotInRadix + } + + n := uint32(len(Xn)) t := len(tweak) // Check if message length is within minLength and maxLength bounds @@ -150,22 +175,15 @@ func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { return ret, ErrTweakLengthInvalid } - radix := c.radix - - // Check if the message is in the current radix - var numX big.Int - _, ok = numX.SetString(X, radix) - if !ok { - return ret, ErrStringNotInRadix - } + radix := c.codec.Radix() // Calculate split point u := n / 2 v := n - u // Split the message - A := X[:u] - B := X[u:] + A := Xn[:u] + B := Xn[u:] // Byte lengths b := int(math.Ceil(math.Ceil(float64(v)*math.Log2(float64(radix))) / 8)) @@ -197,7 +215,7 @@ func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { binary.BigEndian.PutUint32(P[8:12], n) binary.BigEndian.PutUint32(P[12:lenP], uint32(t)) - // Determinte lengths of byte slices + // Determine lengths of byte slices // Q's length is known to always be t+b+1+numPad, to be multiple of 16 lenQ := t + b + 1 + numPad @@ -262,13 +280,13 @@ func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { numModV.Exp(&numRadix, &numV, nil) // Bootstrap for 1st round - _, ok = numA.SetString(A, radix) - if !ok { + numA, err = fpeUtils.Num(A, uint64(radix)) + if err != nil { return ret, ErrStringNotInRadix } - _, ok = numB.SetString(B, radix) - if !ok { + numB, err = fpeUtils.Num(B, uint64(radix)) + if err != nil { return ret, ErrStringNotInRadix } @@ -343,16 +361,7 @@ func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { numB = numC } - A = numA.Text(radix) - B = numB.Text(radix) - - // Pad both A and B properly - A = strings.Repeat("0", int(u)-len(A)) + A - B = strings.Repeat("0", int(v)-len(B)) + B - - ret = A + B - - return ret, nil + return fpeUtils.DecodeNum(&numA, len(A), &numB, len(B), c.codec) } // Decrypt decrypts the string X over the current FF1 parameters @@ -369,9 +378,16 @@ func (c Cipher) Decrypt(X string) (string, error) { func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { var ret string var err error - var ok bool - n := uint32(len(X)) + // String X contains a sequence of characters, where some characters + // might take up multiple bytes. Convert into an array of indices into + // the alphabet embedded in the codec. + Xn, err := c.codec.Encode(X) + if err != nil { + return ret, ErrStringNotInRadix + } + + n := uint32(len(Xn)) t := len(tweak) // Check if message length is within minLength and maxLength bounds @@ -384,22 +400,15 @@ func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { return ret, ErrTweakLengthInvalid } - radix := c.radix - - // Check if the message is in the current radix - var numX big.Int - _, ok = numX.SetString(X, radix) - if !ok { - return ret, ErrStringNotInRadix - } + radix := c.codec.Radix() // Calculate split point u := n / 2 v := n - u // Split the message - A := X[:u] - B := X[u:] + A := Xn[:u] + B := Xn[u:] // Byte lengths b := int(math.Ceil(math.Ceil(float64(v)*math.Log2(float64(radix))) / 8)) @@ -431,7 +440,7 @@ func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { binary.BigEndian.PutUint32(P[8:12], n) binary.BigEndian.PutUint32(P[12:lenP], uint32(t)) - // Determinte lengths of byte slices + // Determine lengths of byte slices // Q's length is known to always be t+b+1+numPad, to be multiple of 16 lenQ := t + b + 1 + numPad @@ -496,13 +505,13 @@ func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { numModV.Exp(&numRadix, &numV, nil) // Bootstrap for 1st round - _, ok = numA.SetString(A, radix) - if !ok { + numA, err = fpeUtils.Num(A, uint64(radix)) + if err != nil { return ret, ErrStringNotInRadix } - _, ok = numB.SetString(B, radix) - if !ok { + numB, err = fpeUtils.Num(B, uint64(radix)) + if err != nil { return ret, ErrStringNotInRadix } @@ -577,16 +586,7 @@ func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { numA = numC } - A = numA.Text(radix) - B = numB.Text(radix) - - // Pad both A and B properly - A = strings.Repeat("0", int(u)-len(A)) + A - B = strings.Repeat("0", int(v)-len(B)) + B - - ret = A + B - - return ret, nil + return fpeUtils.DecodeNum(&numA, len(A), &numB, len(B), c.codec) } // ciph defines how the main block cipher is called. diff --git a/ff1/ff1_test.go b/ff1/ff1_test.go index 90ed8bb..927be7f 100644 --- a/ff1/ff1_test.go +++ b/ff1/ff1_test.go @@ -20,9 +20,12 @@ See the License for the specific language governing permissions and limitations package ff1 import ( + "bytes" "encoding/hex" "fmt" + "strings" "testing" + "unicode/utf8" ) // Test vectors taken from here: http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/FF1samples.pdf @@ -137,7 +140,7 @@ func TestEncrypt(t *testing.T) { } if ciphertext != testVector.ciphertext { - t.Fatalf("\nSample%d\nRadix:\t\t%d\nKey:\t\t%s\nTweak:\t\t%s\nPlaintext:\t%s\nCiphertext:\t%s\nExpected:\t%s", sampleNumber, testVector.radix, testVector.key, testVector.tweak, testVector.plaintext, ciphertext, testVector.ciphertext) + t.Fatalf("\nSample%d\nradix:\t\t%d\nKey:\t\t%s\nTweak:\t\t%s\nPlaintext:\t%s\nCiphertext:\t%s\nExpected:\t%s", sampleNumber, testVector.radix, testVector.key, testVector.tweak, testVector.plaintext, ciphertext, testVector.ciphertext) } }) } @@ -169,13 +172,13 @@ func TestDecrypt(t *testing.T) { } if plaintext != testVector.plaintext { - t.Fatalf("\nSample%d\nRadix:\t\t%d\nKey:\t\t%s\nTweak:\t\t%s\nCiphertext:\t%s\nPlaintext:\t%s\nExpected:\t%s", sampleNumber, testVector.radix, testVector.key, testVector.tweak, testVector.ciphertext, plaintext, testVector.plaintext) + t.Fatalf("\nSample%d\nradix:\t\t%d\nKey:\t\t%s\nTweak:\t\t%s\nCiphertext:\t%s\nPlaintext:\t%s\nExpected:\t%s", sampleNumber, testVector.radix, testVector.key, testVector.tweak, testVector.ciphertext, plaintext, testVector.plaintext) } }) } } -// These are for testing long inputs, which are not in the sandard test vectors +// These are for testing long inputs, which are not in the standard test vectors func TestLong(t *testing.T) { key, err := hex.DecodeString("2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94") @@ -232,6 +235,97 @@ func TestIssue14(t *testing.T) { } } +// Alphabet can contain unicode characters +func TestUnicode(t *testing.T) { + key, err := hex.DecodeString("EF4359D8D580AA4F7F036D6F04FC6A94") + + tweak, err := hex.DecodeString("D8E7920AFA330A73") + + // 0-9 plus a 1-byte, 2-byte, 3-byte and 4-byte utf-8 chars + ff1, err := NewCipherWithAlphabet("0123456789\u0024\u00A2\u0939\u10348", 8, key, tweak) + if err != nil { + t.Fatalf("Unable to create cipher: %v", err) + } + + plaintext := "0123456789\u0024\u00A2\u0939\u10348" + + ciphertext, err := ff1.Encrypt(plaintext) + if err != nil { + t.Fatalf("%v", err) + } + + decrypted, err := ff1.Decrypt(ciphertext) + if err != nil { + t.Fatalf("%v", err) + } + + if plaintext != decrypted { + t.Fatalf("TestUnicode Decrypt Failed. \n Expected: %v \n Got: %v \n", plaintext, decrypted) + } +} + +func TestAlphabetSizes(t *testing.T) { + // encryption deals with numeral values encoded in ceil(log(radix))-sized + // bit strings, up to 16 bits in length - the number of bits in a uint16. + // This test exercises behaviour for all bitstring lengths from 1 to 16. + + key, _ := hex.DecodeString("EF4359D8D580AA4F7F036D6F04FC6A94") + + tweak, _ := hex.DecodeString("D8E7920AFA330A73") + + for s := uint(1); s < 17; s++ { + a, err := buildAlphabet(1 << s) + if err != nil { + t.Fatalf("TestAlphabetSizes: %s", err) + } + + ff1, err := NewCipherWithAlphabet(a, 8, key, tweak) + if err != nil { + t.Fatalf("Unable to create cipher: %v", err) + } + + plaintext := strings.Repeat(string(rune(0)), 10) + ciphertext, err := ff1.Encrypt(plaintext) + if err != nil { + t.Fatalf("%v", err) + } + + decrypted, err := ff1.Decrypt(ciphertext) + if err != nil { + t.Fatalf("%v", err) + } + + if plaintext != decrypted { + t.Fatalf("TestUnicode Decrypt Failed. \n Expected: %v \n Got: %v \n", plaintext, decrypted) + } + + } + +} + +func buildAlphabet(n int) (string, error) { + // Not every code-point can be encoded as utf-8 string. + // For example u+DC00 - u+DFFF contains "isolated surrogate code points" + // that have no string interpretation. + // (https://www.unicode.org/charts/PDF/UDC00.pdf) + // + // Loop through a large number of code points and collect + // up to n code points with valid interpretations. + var alphabet bytes.Buffer + nr := 0 + for i := 0; i < 100000; i++ { + if utf8.ValidRune(rune(i)) { + s := string(rune(i)) + nr++ + alphabet.WriteString(s) + if nr == n { + return alphabet.String(), nil + } + } + } + return alphabet.String(), fmt.Errorf("Failed to collect %d validrunes: only %d collected", n, nr) +} + // Note: panic(err) is just used for example purposes. func ExampleCipher_Encrypt() { // Key and tweak should be byte arrays. Put your key and tweak here. diff --git a/ff3/ff3.go b/ff3/ff3.go index ad5cbdd..a72eb36 100644 --- a/ff3/ff3.go +++ b/ff3/ff3.go @@ -25,6 +25,8 @@ import ( "crypto/aes" "crypto/cipher" "errors" + "fmt" + "github.com/capitalone/fpe/fpeUtils" "math" "math/big" ) @@ -51,7 +53,7 @@ var ( // using a particular key, radix, and tweak type Cipher struct { tweak []byte - radix int + codec fpeUtils.Codec minLen uint32 maxLen uint32 @@ -59,9 +61,19 @@ type Cipher struct { aesBlock cipher.Block } -// NewCipher initializes a new FF3 Cipher for encryption or decryption use -// based on the radix, key and tweak parameters. +const ( + // from func (*big.Int)SetString + legacyAlphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRTSUVWXYZ" +) + +// NewCipher is provided for backwards compatibility for old client code. func NewCipher(radix int, key []byte, tweak []byte) (Cipher, error) { + return NewCipherWithAlphabet(legacyAlphabet[:radix], key, tweak) +} + +// NewAlphaCipher initializes a new FF3 Cipher for encryption or decryption use +// based on the alphabet, max tweak length, key and tweak parameters. +func NewCipherWithAlphabet(alphabet string, key []byte, tweak []byte) (Cipher, error) { var newCipher Cipher keyLen := len(key) @@ -71,9 +83,16 @@ func NewCipher(radix int, key []byte, tweak []byte) (Cipher, error) { return newCipher, errors.New("key length must be 128, 192, or 256 bits") } - // While FF3 allows radices in [2, 2^16], there is a practical limit to 36 (alphanumeric) because the Go math/big library only supports up to base 36. - if (radix < 2) || (radix > big.MaxBase) { - return newCipher, errors.New("radix must be between 2 and 36, inclusive") + codec, err := fpeUtils.NewCodec(alphabet) + if err != nil { + return newCipher, fmt.Errorf("error making codec: %s", err) + } + + radix := codec.Radix() + + // FF3 allows radices in [2, 2^16] + if (radix < 2) || (radix > 65536) { + return newCipher, errors.New("radix must be between 2 and 65536, inclusive") } // Make sure the given the length of tweak in bits is 64 @@ -86,8 +105,8 @@ func NewCipher(radix int, key []byte, tweak []byte) (Cipher, error) { maxLen := uint32(math.Floor((192 / math.Log2(float64(radix))))) - // Make sure 2 <= minLength <= maxLength < 2*floor(log base radix of 2^96) is satisfied - if (minLen < 2) || (maxLen < minLen) || (float64(maxLen) > (192 / math.Log2(float64(radix)))) { + // Make sure minLength <= maxLength < 2*floor(log base radix of 2^96) is satisfied + if (maxLen < minLen) || (float64(maxLen) > (192 / math.Log2(float64(radix)))) { return newCipher, errors.New("minLen or maxLen invalid, adjust your radix") } @@ -99,7 +118,7 @@ func NewCipher(radix int, key []byte, tweak []byte) (Cipher, error) { } newCipher.tweak = tweak - newCipher.radix = radix + newCipher.codec = codec newCipher.minLen = minLen newCipher.maxLen = maxLen newCipher.aesBlock = aesBlock @@ -120,9 +139,16 @@ func (c Cipher) Encrypt(X string) (string, error) { // use-case of FPE for things like credit card numbers. func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { var ret string - var ok bool - n := uint32(len(X)) + // String X contains a sequence of characters, where some characters + // might take up multiple bytes. Convert into an array of indices into + // the alphabet embedded in the codec. + Xn, err := c.codec.Encode(X) + if err != nil { + return ret, ErrStringNotInRadix + } + + n := uint32(len(Xn)) // Check if message length is within minLength and maxLength bounds // TODO BUG: when n==c.maxLen, it breaks. For now, I'm changing @@ -136,22 +162,18 @@ func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { return ret, ErrTweakLengthInvalid } - radix := c.radix - - // Check if the message is in the current radix - var numX big.Int - _, ok = numX.SetString(X, radix) - if !ok { - return ret, ErrStringNotInRadix - } + radix := c.codec.Radix() // Calculate split point u := uint32(math.Ceil(float64(n) / 2)) v := n - u // Split the message - A := X[:u] - B := X[u:] + A := Xn[:u] + B := Xn[u:] + + // C must be large enough to hold either A or B + C := make([]uint16, u) // Split the tweak Tl := tweak[:halfTweakLen] @@ -200,8 +222,8 @@ func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { P[3] = W[3] ^ byte(i) // The remaining 12 bytes of P are for rev(B) with padding - _, ok = numB.SetString(rev(B), radix) - if !ok { + numB, err = fpeUtils.NumRev(B, uint64(radix)) + if err != nil { return ret, ErrStringNotInRadix } @@ -225,8 +247,8 @@ func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { numY.SetBytes(S[:]) // Calculate c - _, ok = numC.SetString(rev(A), radix) - if !ok { + numC, err = fpeUtils.NumRev(A, uint64(radix)) + if err != nil { return ret, ErrStringNotInRadix } @@ -238,22 +260,29 @@ func (c Cipher) EncryptWithTweak(X string, tweak []byte) (string, error) { numC.Mod(&numC, &numModV) } - C := numC.Text(c.radix) - - // Need to pad the text with leading 0s first to make sure it's the correct length - for len(C) < int(m) { - C = "0" + C + C = C[:m] + _, err := fpeUtils.StrRev(&numC, C, uint64(c.codec.Radix())) + if err != nil { + return "", err } - C = rev(C) // Final steps - A = B - B = C + A, B, C = B, C, A + } + + // convert the numeral arrays back to strings + strA, err := c.codec.Decode(A) + if err != nil { + return "", err } - ret = A + B + strB, err := c.codec.Decode(B) + if err != nil { + return "", err + } + + return strA + strB, nil - return ret, nil } // Decrypt decrypts the string X over the current FF3 parameters @@ -269,9 +298,16 @@ func (c Cipher) Decrypt(X string) (string, error) { // use-case of FPE for things like credit card numbers. func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { var ret string - var ok bool - n := uint32(len(X)) + // String X contains a sequence of characters, where some characters + // might take up multiple bytes. Convert into an array of indices into + // the alphabet embedded in the codec. + Xn, err := c.codec.Encode(X) + if err != nil { + return ret, ErrStringNotInRadix + } + + n := uint32(len(Xn)) // Check if message length is within minLength and maxLength bounds // TODO BUG: when n==c.maxLen, it breaks. For now, I'm changing @@ -285,22 +321,18 @@ func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { return ret, ErrTweakLengthInvalid } - radix := c.radix - - // Check if the message is in the current radix - var numX big.Int - _, ok = numX.SetString(X, radix) - if !ok { - return ret, ErrStringNotInRadix - } + radix := c.codec.Radix() // Calculate split point u := uint32(math.Ceil(float64(n) / 2)) v := n - u // Split the message - A := X[:u] - B := X[u:] + A := Xn[:u] + B := Xn[u:] + + // C must be large enough to hold either A or B + C := make([]uint16, u) // Split the tweak Tl := tweak[:halfTweakLen] @@ -349,8 +381,8 @@ func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { P[3] = W[3] ^ byte(i) // The remaining 12 bytes of P are for rev(A) with padding - _, ok = numA.SetString(rev(A), radix) - if !ok { + numA, err = fpeUtils.NumRev(A, uint64(radix)) + if err != nil { return ret, ErrStringNotInRadix } @@ -373,12 +405,9 @@ func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { // Calculate numY numY.SetBytes(S[:]) - // Calculate numY - numY.SetBytes(S[:]) - // Calculate c - _, ok = numC.SetString(rev(B), radix) - if !ok { + numC, err = fpeUtils.NumRev(B, uint64(radix)) + if err != nil { return ret, ErrStringNotInRadix } @@ -390,20 +419,28 @@ func (c Cipher) DecryptWithTweak(X string, tweak []byte) (string, error) { numC.Mod(&numC, &numModV) } - C := numC.Text(c.radix) - - // Need to pad the text with leading 0s first to make sure it's the correct length - for len(C) < int(m) { - C = "0" + C + C = C[:m] + _, err := fpeUtils.StrRev(&numC, C, uint64(c.codec.Radix())) + if err != nil { + return "", err } - C = rev(C) // Final steps - B = A - A = C + B, A, C = A, C, B + } + + // convert the numeral arrays back to strings + strA, err := c.codec.Decode(A) + if err != nil { + return "", err + } + + strB, err := c.codec.Decode(B) + if err != nil { + return "", err } - return A + B, nil + return strA + strB, nil } // rev reverses a string diff --git a/ff3/ff3_test.go b/ff3/ff3_test.go index ea32b33..de0b23f 100644 --- a/ff3/ff3_test.go +++ b/ff3/ff3_test.go @@ -20,9 +20,12 @@ See the License for the specific language governing permissions and limitations package ff3 import ( + "bytes" "encoding/hex" "fmt" + "strings" "testing" + "unicode/utf8" ) // Test vectors taken from here: http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/FF3samples.pdf @@ -204,6 +207,68 @@ func TestDecrypt(t *testing.T) { } } +func TestAlphabetSizes(t *testing.T) { + // encryption deals with numeral values encoded in ceil(log(radix))-sized + // bit strings, up to 16 bits in length - the number of bits in a uint16. + // This test exercises behaviour for all bitstring lengths from 1 to 16. + + key, _ := hex.DecodeString("EF4359D8D580AA4F7F036D6F04FC6A94") + + tweak, _ := hex.DecodeString("D8E7920AFA330A73") + + for s := uint(1); s < 17; s++ { + a, err := buildAlphabet(1 << s) + if err != nil { + t.Fatalf("TestAlphabetSizes: %s", err) + } + + ff3, err := NewCipherWithAlphabet(a, key, tweak) + if err != nil { + t.Fatalf("Unable to create cipher: %v", err) + } + + plaintext := strings.Repeat(string(rune(0)), 10) + ciphertext, err := ff3.Encrypt(plaintext) + if err != nil { + t.Fatalf("%v", err) + } + + decrypted, err := ff3.Decrypt(ciphertext) + if err != nil { + t.Fatalf("%v", err) + } + + if plaintext != decrypted { + t.Fatalf("TestUnicode Decrypt Failed. \n Expected: %v \n Got: %v \n", plaintext, decrypted) + } + + } + +} + +func buildAlphabet(n int) (string, error) { + // Not every code-point can be encoded as utf-8 string. + // For example u+DC00 - u+DFFF contains "isolated surrogate code points" + // that have no string interpretation. + // (https://www.unicode.org/charts/PDF/UDC00.pdf) + // + // Loop through a large number of code points and collect + // up to n code points with valid interpretations. + var alphabet bytes.Buffer + nr := 0 + for i := 0; i < 100000; i++ { + if utf8.ValidRune(rune(i)) { + s := string(rune(i)) + nr++ + alphabet.WriteString(s) + if nr == n { + return alphabet.String(), nil + } + } + } + return alphabet.String(), fmt.Errorf("Failed to collect %d validrunes: only %d collected", n, nr) +} + // Note: panic(err) is just used for example purposes. func ExampleCipher_Encrypt() { // Key and tweak should be byte arrays. Put your key and tweak here. diff --git a/fpeUtils/codec.go b/fpeUtils/codec.go new file mode 100644 index 0000000..96d3940 --- /dev/null +++ b/fpeUtils/codec.go @@ -0,0 +1,108 @@ +/* + +SPDX-Copyright: Copyright (c) Capital One Services, LLC +SPDX-License-Identifier: Apache-2.0 +Copyright 2017 Capital One Services, LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +*/ + +// Package fpeUtils provides some encoding helpers for use +// in the FF1 and FF3 format-preserving encryption packages. +package fpeUtils + +import ( + "bytes" + "fmt" + "unicode/utf8" +) + +// Codec supports the conversion of an arbitrary alphabet into ordinal +// values from 0 to length of alphabet-1. +// Element 'rtu' (rune-to-uint16) supports the mapping from runes to ordinal values. +// Element 'utr' (uint16-to-rune) supports the mapping from ordinal values to runes. +type Codec struct { + rtu map[rune]uint16 + utr []rune +} + +// NewCodec builds a Codec from the set of unique characters taken from the string s. +// The string contains arbitrary Utf-8 characters. +// It is an error to try to construct a codec from an alphabet with more the 65536 characters. +func NewCodec(s string) (Codec, error) { + var ret Codec + ret.rtu = make(map[rune]uint16) + ret.utr = make([]rune, utf8.RuneCountInString(s)) + + var i uint32 + for _, rv := range s { + // duplicates are tolerated, but ignored. + if _, ok := ret.rtu[rv]; !ok { + ret.utr[i] = rv + ret.rtu[rv] = uint16(i) + if i == 65536 { + return ret, fmt.Errorf("alphabet must contain no more than 65536 characters") + } + i++ + } + } + // shrink utr to unique characters + ret.utr = ret.utr[0:i] + return ret, nil +} + +// Radix returns the size of the alphabet supported by the Codec. +func (a *Codec) Radix() int { + return len(a.utr) +} + +// Encode the supplied string as an array of ordinal values giving the +// position of each character in the alphabet. +// It is an error for the supplied string to contain characters than are not +// in the alphabet. +func (a *Codec) Encode(s string) ([]uint16, error) { + n := utf8.RuneCountInString(s) + c := n + if n%2 == 1 { + // ensure the numeral array has even-sized capacity for FF3 + c++ + } + ret := make([]uint16, n, c) + + var ok bool + i := 0 + for _, rv := range s { + ret[i], ok = a.rtu[rv] + if !ok { + return ret, fmt.Errorf("character at position %d is not in alphabet", i) + } + i++ + } + return ret, nil +} + +// Decode constructs a string from an array of ordinal values where each +// value specifies the position of the character in the alphabet. +// It is an error for the array to contain values outside the boundary of the +// alphabet. +func (a *Codec) Decode(n []uint16) (string, error) { + var b bytes.Buffer + + for i, v := range n { + if v < 0 || int(v) > len(a.utr)-1 { + return "", fmt.Errorf("numeral at position %d out of range: %d not in [0..%d]", i, v, len(a.utr)-1) + } + b.WriteString(string(a.utr[v])) + } + return b.String(), nil +} diff --git a/fpeUtils/codec_test.go b/fpeUtils/codec_test.go new file mode 100644 index 0000000..9c37a4a --- /dev/null +++ b/fpeUtils/codec_test.go @@ -0,0 +1,163 @@ +/* + +SPDX-Copyright: Copyright (c) Capital One Services, LLC +SPDX-License-Identifier: Apache-2.0 +Copyright 2017 Capital One Services, LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +*/ + +// Package fpeUtils provides some encoding helpers for use +// in the FF1 and FF3 format-preserving encryption packages. +package fpeUtils + +import ( + "bytes" + "fmt" + "reflect" + "testing" + "unicode/utf8" +) + +var testCodec = []struct { + alphabet string + radix int + input string + output []uint16 + error bool +}{ + { + "0123456789abcdefghijklmnopqrstuvwxyz ", + 37, + "hello world", + []uint16{17, 14, 21, 21, 24, 36, 32, 24, 27, 21, 13}, + false, + }, + { + "hello world", + 8, + "hello world", + []uint16{0, 1, 2, 2, 3, 4, 5, 3, 6, 2, 7}, + false, + }, + { + "hello world\u2318-", + 10, + "\u2318 - hello world", + []uint16{8, 4, 9, 4, 0, 1, 2, 2, 3, 4, 5, 3, 6, 2, 7}, + false, + }, +} + +func TestCodec(t *testing.T) { + for idx, spec := range testCodec { + sampleNumber := idx + 1 + t.Run(fmt.Sprintf("Sample%d", sampleNumber), func(t *testing.T) { + al, err := NewCodec(spec.alphabet) + if err != nil { + t.Fatalf("Error making codec: %s", err) + } + if al.Radix() != spec.radix { + t.Fatalf("Incorrect radix %d - expected %d", al.Radix(), spec.radix) + } + + es, err := al.Encode(spec.input) + if err != nil { + t.Fatalf("Unable to encode '%s' using alphabet '%s': %s", spec.input, spec.alphabet, err) + } + + if !reflect.DeepEqual(spec.output, es) { + t.Fatalf("Encode output incorrect: %v", es) + } + + s, err := al.Decode(es) + if err != nil { + t.Fatalf("Unable to decode: %s", err) + } + + if s != spec.input { + t.Fatalf("Decode error: got '%s' expected '%s'", s, spec.input) + } + }) + } +} + +func TestEncoder(t *testing.T) { + tests := []struct { + alphabet string + radix int + input string + }{ + { + "", + 0, + "hello world", + }, + { + "helloworld", + 7, + "hello world", + }, + } + + for idx, spec := range tests { + t.Run(fmt.Sprintf("Sample%d", idx+1), func(t *testing.T) { + al, err := NewCodec(spec.alphabet) + if err != nil { + t.Fatalf("Error making codec: %s", err) + } + if al.Radix() != spec.radix { + t.Fatalf("Incorrect radix %d - expected %d", al.Radix(), spec.radix) + } + + _, err = al.Encode(spec.input) + if err == nil { + t.Fatalf("Encode unexpectedly succeeded: input '%s', alphabet '%s'", spec.input, spec.alphabet) + } + }) + } +} + +func TestLargeAlphabet(t *testing.T) { + var alphabet bytes.Buffer + + nr := 0 + for i := 0; i < 100000; i++ { + if utf8.ValidRune(rune(i)) { + s := string(rune(i)) + nr++ + alphabet.WriteString(s) + if nr == 65536 { + break + } + } + } + + al, err := NewCodec(alphabet.String()) + if err != nil { + t.Fatalf("Error making codec: %s", err) + } + if al.Radix() != 65536 { + t.Fatalf("Incorrect radix %d ", al.Radix()) + } + + nml, err := al.Encode("hello world") + if err != nil { + t.Fatalf("Unable to encode: %s", err) + } + + _, err = al.Decode(nml) + if err != nil { + t.Fatalf("Unable to decode: %s", err) + } +} diff --git a/fpeUtils/numeral.go b/fpeUtils/numeral.go new file mode 100644 index 0000000..633e0c8 --- /dev/null +++ b/fpeUtils/numeral.go @@ -0,0 +1,132 @@ +/* + +SPDX-Copyright: Copyright (c) Capital One Services, LLC +SPDX-License-Identifier: Apache-2.0 +Copyright 2017 Capital One Services, LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +*/ + +// Package fpeUtils provides some encoding helpers for use +// in the FF1 and FF3 format-preserving encryption packages. +package fpeUtils + +import ( + "fmt" + "math/big" +) + +// Num constructs a big.Int from an array of uint16, where each element represents +// one digit in the given radix. The array is arranged with the most significant digit in element 0, +// down to the least significant digit in element len-1. +func Num(s []uint16, radix uint64) (big.Int, error) { + var bigRadix, bv, x big.Int + if radix > 65536 { + return x, fmt.Errorf("Radix (%d) too big: max supported radix is 65536", radix) + } + + maxv := uint16(radix - 1) + bigRadix.SetUint64(uint64(radix)) + for i, v := range s { + if v > maxv { + return x, fmt.Errorf("Value at %d out of range: got %d - expected 0..%d", i, v, maxv) + } + bv.SetUint64(uint64(v)) + x.Mul(&x, &bigRadix) + x.Add(&x, &bv) + } + return x, nil +} + +// NumRev constructs a big.Int from an array of uint16, where each element represents +// one digit in the given radix. The array is arranged with the least significant digit in element 0, +// down to the most significant digit in element len-1. +func NumRev(s []uint16, radix uint64) (big.Int, error) { + var bigRadix, bv, x big.Int + if radix > 65536 { + return x, fmt.Errorf("Radix (%d) too big: max supported radix is 65536", radix) + } + + maxv := uint16(radix - 1) + bigRadix.SetUint64(uint64(radix)) + for i := len(s) - 1; i >= 0; i-- { + if s[i] > maxv { + return x, fmt.Errorf("Value at %d out of range: got %d - expected 0..%d", i, s[i], maxv) + } + bv.SetUint64(uint64(s[i])) + x.Mul(&x, &bigRadix) + x.Add(&x, &bv) + } + return x, nil +} + +// Str populates an array of uint16 with digits representing big.Int x in the specified radix. +// The array is arranged with the most significant digit in element 0. +// The array is built from big.Int x from the least significant digit upwards. If the supplied +// array is too short, the most significant digits of x are quietly lost. +func Str(x *big.Int, r []uint16, radix uint64) ([]uint16, error) { + + var bigRadix, mod, v big.Int + if radix > 65536 { + return r, fmt.Errorf("Radix (%d) too big: max supported radix os 65536", radix) + } + m := len(r) + v.Set(x) + bigRadix.SetUint64(radix) + for i := range r { + v.DivMod(&v, &bigRadix, &mod) + r[m-i-1] = uint16(mod.Uint64()) + } + if v.Sign() != 0 { + return r, fmt.Errorf("destination array too small: %s remains after conversion", &v) + } + return r, nil +} + +// StrRev populates an array of uint16 with digits representing big.Int x in the specified radix. +// The array is arranged with the least significant digit in element 0. +// The array is built from big.Int x from the least significant digit upwards. If the supplied +// array is too short, the most significant digits of x are quietly lost. +func StrRev(x *big.Int, r []uint16, radix uint64) ([]uint16, error) { + + var bigRadix, mod, v big.Int + if radix > 65536 { + return r, fmt.Errorf("Radix (%d) too big: max supported radix os 65536", radix) + } + v.Set(x) + bigRadix.SetUint64(radix) + for i := range r { + v.DivMod(&v, &bigRadix, &mod) + r[i] = uint16(mod.Uint64()) + } + if v.Sign() != 0 { + return r, fmt.Errorf("destination array too small: %s remains after conversion", &v) + } + return r, nil +} + +// DecodeNum constructs a string from indices into the alphabet embedded in the Codec. The indices +// are encoded in the big Ints a and b. +// lenA and lenB are the number of characters that should be built from the corresponding big Ints. +func DecodeNum(a *big.Int, lenA int, b *big.Int, lenB int, c Codec) (string, error) { + ret := make([]uint16, lenA+lenB) + _, err := Str(a, ret[:lenA], uint64(c.Radix())) + if err != nil { + return "", err + } + _, err = Str(b, ret[lenA:], uint64(c.Radix())) + if err != nil { + return "", err + } + return c.Decode(ret) +} diff --git a/fpeUtils/numeral_test.go b/fpeUtils/numeral_test.go new file mode 100644 index 0000000..bc18c29 --- /dev/null +++ b/fpeUtils/numeral_test.go @@ -0,0 +1,129 @@ +/* + +SPDX-Copyright: Copyright (c) Capital One Services, LLC +SPDX-License-Identifier: Apache-2.0 +Copyright 2017 Capital One Services, LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +*/ + +// Package fpeUtils provides some encoding helpers for use +// in the FF1 and FF3 format-preserving encryption packages. +package fpeUtils + +import ( + "fmt" + "math/big" + "reflect" + "testing" +) + +func TestEncode(t *testing.T) { + + testSpec := []struct { + radix uint64 + intv *big.Int + numeral []uint16 + }{ + { + 10, + big.NewInt(100), + []uint16{1, 0, 0}, + }, + { + 65536, + big.NewInt(0).Exp(big.NewInt(65536), big.NewInt(7), nil), + []uint16{1, 0, 0, 0, 0, 0, 0, 0}, + }, + } + + for idx, spec := range testSpec { + sampleNumber := idx + 1 + t.Run(fmt.Sprintf("Sample%d", sampleNumber), func(t *testing.T) { + v, err := Num(spec.numeral, spec.radix) + if err != nil { + t.Fatalf("error in Num: %s", err) + } + if v.Cmp(spec.intv) != 0 { + t.Fatalf("expected %v got %v", spec.intv, &v) + } + r := make([]uint16, len(spec.numeral)) + Str(&v, r, spec.radix) + if !reflect.DeepEqual(spec.numeral, r) { + t.Fatalf("Encode numeral incorrect: %v", r) + } + + }) + } +} + +func TestEncodeError(t *testing.T) { + + testSpec := []struct { + radix uint64 + intv *big.Int + numeral []uint16 + }{ + { + 10, + big.NewInt(100), + []uint16{10, 0, 0}, + }, + { + 65537, + big.NewInt(0).Exp(big.NewInt(65537), big.NewInt(7), nil), + []uint16{1, 0, 0, 0, 0, 0, 0, 0}, + }, + } + + for idx, spec := range testSpec { + sampleNumber := idx + 1 + t.Run(fmt.Sprintf("Sample%d", sampleNumber), func(t *testing.T) { + _, err := Num(spec.numeral, spec.radix) + if err == nil { + t.Fatalf("expected error in Num") + } + }) + } +} + +func TestDecodeError(t *testing.T) { + + testSpec := []struct { + radix uint64 + intv *big.Int + numeral []uint16 + }{ + { + 10, + big.NewInt(100), + []uint16{1, 0, 0}, + }, + { + 65537, + big.NewInt(0).Exp(big.NewInt(65537), big.NewInt(7), nil), + []uint16{1, 0, 0, 0, 0, 0, 0, 0}, + }, + } + + for idx, spec := range testSpec { + sampleNumber := idx + 1 + t.Run(fmt.Sprintf("Sample%d", sampleNumber), func(t *testing.T) { + r := make([]uint16, 2) + _, err := Str(spec.intv, r, spec.radix) + if err == nil { + t.Fatalf("expected error in Str") + } + }) + } +}