From 5b9ea3df0dc355d77b9f061f63064614a97b8b67 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 22 Dec 2019 16:43:50 +0200 Subject: [PATCH 01/27] cipher: fix typo in error log * cipher/cipher.c (_gcry_cipher_encrypt): Fix log "cipher_decrypt: ..." to "cipher_encrypt: ...". -- Signed-off-by: Jussi Kivilinna --- cipher/cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cipher/cipher.c b/cipher/cipher.c index ab3e4240..bd571367 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -1125,7 +1125,7 @@ _gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize, if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key) { - log_error ("cipher_decrypt: key not set\n"); + log_error ("cipher_encrypt: key not set\n"); return GPG_ERR_MISSING_KEY; } From 0837d7e6be3e604c1f7b86d18c582d8aa7ed858c Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 23 Dec 2019 03:39:46 +0200 Subject: [PATCH 02/27] rijndael-ppc: fix bad register used for vector load/store assembly * cipher/rijndael-ppc.c (vec_aligned_ld, vec_load_be, vec_aligned_st) (vec_store_be): Add "r0" to clobber list for load/store instructions. -- Register r0 must not be used for RA input for vector load/store instructions as r0 is not read as register but as value '0'. Signed-off-by: Jussi Kivilinna --- cipher/rijndael-ppc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 7c349f8b..48a47edd 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -138,7 +138,7 @@ vec_aligned_ld(unsigned long offset, const unsigned char *ptr) __asm__ ("lvx %0,%1,%2\n\t" : "=v" (vec) : "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + : "memory", "r0"); return vec; #else return vec_vsx_ld (offset, ptr); @@ -169,7 +169,7 @@ vec_load_be(unsigned long offset, const unsigned char *ptr, __asm__ ("lxvw4x %x0,%1,%2\n\t" : "=wa" (vec) : "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + : "memory", "r0"); __asm__ ("vperm %0,%1,%1,%2\n\t" : "=v" (vec) : "v" (vec), "v" (be_bswap_const)); @@ -188,7 +188,7 @@ vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr) __asm__ ("stvx %0,%1,%2\n\t" : : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + : "memory", "r0"); #else vec_vsx_st (vec, offset, ptr); #endif @@ -208,7 +208,7 @@ vec_store_be(block vec, unsigned long offset, unsigned char *ptr, __asm__ ("stxvw4x %x0,%1,%2\n\t" : : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + : "memory", "r0"); #else (void)be_bswap_const; vec_vsx_st (vec, offset, ptr); From 110077505acacae62cec3d09b32a084b9cee0368 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 22 Dec 2019 16:44:07 +0200 Subject: [PATCH 03/27] rijndael-ppc: performance improvements * cipher/rijndael-ppc.c (ALIGNED_LOAD, ALIGNED_STORE, VEC_LOAD_BE) (VEC_STORE_BE): Rewrite. (VEC_BE_SWAP, VEC_LOAD_BE_NOSWAP, VEC_STORE_BE_NOSWAP): New. (PRELOAD_ROUND_KEYS, AES_ENCRYPT, AES_DECRYPT): Adjust to new input parameters for vector load macros. (ROUND_KEY_VARIABLES_ALL, PRELOAD_ROUND_KEYS_ALL) (AES_ENCRYPT_ALL): New. (vec_bswap32_const_neg): New. (vec_aligned_ld, vec_aligned_st, vec_load_be_const): Rename to... (asm_aligned_ls, asm_aligned_st, asm_load_be_const): ...these. (asm_be_swap, asm_vperm1, asm_load_be_noswap) (asm_store_be_noswap): New. (vec_add_uint128): Rename to... (asm_add_uint128): ...this. (asm_xor, asm_cipher_be, asm_cipherlast_be, asm_ncipher_be) (asm_ncipherlast_be): New inline assembly functions with volatile keyword to allow manual instruction ordering. (_gcry_aes_ppc8_setkey, aes_ppc8_prepare_decryption) (_gcry_aes_ppc8_encrypt, _gcry_aes_ppc8_decrypt) (_gcry_aes_ppc8_cfb_enc, _gcry_aes_ppc8_cbc_enc) (_gcry_aes_ppc8_ocb_auth): Update to use new&rewritten helper macros. (_gcry_aes_ppc8_cfb_dec, _gcry_aes_ppc8_cbc_dec) (_gcry_aes_ppc8_ctr_enc, _gcry_aes_ppc8_ocb_crypt) (_gcry_aes_ppc8_xts_crypt): Update to use new&rewritten helper macros; Tune 8-block parallel paths with manual instruction ordering. -- Benchmarks on POWER8 (ppc64le, ~3.8Ghz): Before: AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.06 ns/B 902.2 MiB/s 4.02 c/B CBC dec | 0.208 ns/B 4585 MiB/s 0.790 c/B CFB enc | 1.06 ns/B 900.4 MiB/s 4.02 c/B CFB dec | 0.208 ns/B 4588 MiB/s 0.790 c/B CTR enc | 0.238 ns/B 4007 MiB/s 0.904 c/B CTR dec | 0.238 ns/B 4009 MiB/s 0.904 c/B XTS enc | 0.492 ns/B 1937 MiB/s 1.87 c/B XTS dec | 0.488 ns/B 1955 MiB/s 1.85 c/B OCB enc | 0.243 ns/B 3928 MiB/s 0.922 c/B OCB dec | 0.247 ns/B 3858 MiB/s 0.939 c/B OCB auth | 0.213 ns/B 4482 MiB/s 0.809 c/B After (cbc-dec & cfb-dec & xts & ocb ~6% faster, ctr ~11% faster): AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.06 ns/B 902.1 MiB/s 4.02 c/B CBC dec | 0.196 ns/B 4877 MiB/s 0.743 c/B CFB enc | 1.06 ns/B 902.2 MiB/s 4.02 c/B CFB dec | 0.195 ns/B 4889 MiB/s 0.741 c/B CTR enc | 0.214 ns/B 4448 MiB/s 0.815 c/B CTR dec | 0.214 ns/B 4452 MiB/s 0.814 c/B XTS enc | 0.461 ns/B 2067 MiB/s 1.75 c/B XTS dec | 0.456 ns/B 2092 MiB/s 1.73 c/B OCB enc | 0.227 ns/B 4200 MiB/s 0.863 c/B OCB dec | 0.234 ns/B 4072 MiB/s 0.890 c/B OCB auth | 0.207 ns/B 4604 MiB/s 0.787 c/B Benchmarks on POWER9 (ppc64le, ~3.8Ghz): Before: AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.04 ns/B 918.7 MiB/s 3.94 c/B CBC dec | 0.240 ns/B 3982 MiB/s 0.910 c/B CFB enc | 1.04 ns/B 917.6 MiB/s 3.95 c/B CFB dec | 0.241 ns/B 3963 MiB/s 0.914 c/B CTR enc | 0.249 ns/B 3835 MiB/s 0.945 c/B CTR dec | 0.252 ns/B 3787 MiB/s 0.957 c/B XTS enc | 0.505 ns/B 1889 MiB/s 1.92 c/B XTS dec | 0.495 ns/B 1926 MiB/s 1.88 c/B OCB enc | 0.303 ns/B 3152 MiB/s 1.15 c/B OCB dec | 0.305 ns/B 3129 MiB/s 1.16 c/B OCB auth | 0.265 ns/B 3595 MiB/s 1.01 c/B After (cbc-dec & cfb-dec ~6% faster, ctr ~11% faster, ocb ~4% faster): AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.04 ns/B 917.3 MiB/s 3.95 c/B CBC dec | 0.225 ns/B 4234 MiB/s 0.856 c/B CFB enc | 1.04 ns/B 917.8 MiB/s 3.95 c/B CFB dec | 0.226 ns/B 4214 MiB/s 0.860 c/B CTR enc | 0.221 ns/B 4306 MiB/s 0.842 c/B CTR dec | 0.223 ns/B 4271 MiB/s 0.848 c/B XTS enc | 0.503 ns/B 1897 MiB/s 1.91 c/B XTS dec | 0.495 ns/B 1928 MiB/s 1.88 c/B OCB enc | 0.288 ns/B 3309 MiB/s 1.10 c/B OCB dec | 0.292 ns/B 3266 MiB/s 1.11 c/B OCB auth | 0.267 ns/B 3570 MiB/s 1.02 c/B Signed-off-by: Jussi Kivilinna --- cipher/rijndael-ppc.c | 1839 +++++++++++++++++++++++++---------------- 1 file changed, 1112 insertions(+), 727 deletions(-) diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 48a47edd..a8bcae46 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -51,17 +51,27 @@ typedef union #define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE -#define ALIGNED_LOAD(in_ptr) \ - (vec_aligned_ld (0, (const unsigned char *)(in_ptr))) +#define ALIGNED_LOAD(in_ptr, offs) \ + (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr))) -#define ALIGNED_STORE(out_ptr, vec) \ - (vec_aligned_st ((vec), 0, (unsigned char *)(out_ptr))) +#define ALIGNED_STORE(out_ptr, offs, vec) \ + (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr))) -#define VEC_LOAD_BE(in_ptr, bige_const) \ - (vec_load_be (0, (const unsigned char *)(in_ptr), bige_const)) +#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const))) -#define VEC_STORE_BE(out_ptr, vec, bige_const) \ - (vec_store_be ((vec), 0, (unsigned char *)(out_ptr), bige_const)) +#define VEC_LOAD_BE(in_ptr, offs, bige_const) \ + (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \ + bige_const)) + +#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \ + (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr))) + +#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \ + (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \ + (void *)(out_ptr))) + +#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \ + (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr))) #define ROUND_KEY_VARIABLES \ @@ -69,166 +79,257 @@ typedef union #define PRELOAD_ROUND_KEYS(nrounds) \ do { \ - rkey0 = ALIGNED_LOAD(&rk[0]); \ - rkeylast = ALIGNED_LOAD(&rk[nrounds]); \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ } while (0) - #define AES_ENCRYPT(blk, nrounds) \ do { \ blk ^= rkey0; \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[1])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[2])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[3])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[4])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[5])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[6])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[7])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[8])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[9])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \ if (nrounds >= 12) \ { \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[10])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[11])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \ if (rounds > 12) \ { \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[12])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[13])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \ } \ } \ - blk = vec_cipherlast_be (blk, rkeylast); \ + blk = asm_cipherlast_be (blk, rkeylast); \ } while (0) - #define AES_DECRYPT(blk, nrounds) \ do { \ blk ^= rkey0; \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[1])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[2])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[3])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[4])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[5])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[6])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[7])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[8])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[9])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \ if (nrounds >= 12) \ { \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[10])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[11])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \ if (rounds > 12) \ { \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[12])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[13])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \ } \ } \ - blk = vec_ncipherlast_be (blk, rkeylast); \ + blk = asm_ncipherlast_be (blk, rkeylast); \ } while (0) +#define ROUND_KEY_VARIABLES_ALL \ + block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \ + rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast + +#define PRELOAD_ROUND_KEYS_ALL(nrounds) \ + do { \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkey1 = ALIGNED_LOAD (rk, 1); \ + rkey2 = ALIGNED_LOAD (rk, 2); \ + rkey3 = ALIGNED_LOAD (rk, 3); \ + rkey4 = ALIGNED_LOAD (rk, 4); \ + rkey5 = ALIGNED_LOAD (rk, 5); \ + rkey6 = ALIGNED_LOAD (rk, 6); \ + rkey7 = ALIGNED_LOAD (rk, 7); \ + rkey8 = ALIGNED_LOAD (rk, 8); \ + rkey9 = ALIGNED_LOAD (rk, 9); \ + if (nrounds >= 12) \ + { \ + rkey10 = ALIGNED_LOAD (rk, 10); \ + rkey11 = ALIGNED_LOAD (rk, 11); \ + if (rounds > 12) \ + { \ + rkey12 = ALIGNED_LOAD (rk, 12); \ + rkey13 = ALIGNED_LOAD (rk, 13); \ + } \ + } \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ + } while (0) + +#define AES_ENCRYPT_ALL(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_cipher_be (blk, rkey1); \ + blk = asm_cipher_be (blk, rkey2); \ + blk = asm_cipher_be (blk, rkey3); \ + blk = asm_cipher_be (blk, rkey4); \ + blk = asm_cipher_be (blk, rkey5); \ + blk = asm_cipher_be (blk, rkey6); \ + blk = asm_cipher_be (blk, rkey7); \ + blk = asm_cipher_be (blk, rkey8); \ + blk = asm_cipher_be (blk, rkey9); \ + if (nrounds >= 12) \ + { \ + blk = asm_cipher_be (blk, rkey10); \ + blk = asm_cipher_be (blk, rkey11); \ + if (rounds > 12) \ + { \ + blk = asm_cipher_be (blk, rkey12); \ + blk = asm_cipher_be (blk, rkey13); \ + } \ + } \ + blk = asm_cipherlast_be (blk, rkeylast); \ + } while (0) + + +#ifdef WORDS_BIGENDIAN static const block vec_bswap32_const = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; +#else +static const block vec_bswap32_const_neg = + { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 }; +#endif static ASM_FUNC_ATTR_INLINE block -vec_aligned_ld(unsigned long offset, const unsigned char *ptr) +asm_aligned_ld(unsigned long offset, const void *ptr) { -#ifndef WORDS_BIGENDIAN block vec; - __asm__ ("lvx %0,%1,%2\n\t" - : "=v" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); + __asm__ volatile ("lvx %0,%1,%2\n\t" + : "=v" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); return vec; -#else - return vec_vsx_ld (offset, ptr); -#endif } +static ASM_FUNC_ATTR_INLINE void +asm_aligned_st(block vec, unsigned long offset, void *ptr) +{ + __asm__ volatile ("stvx %0,%1,%2\n\t" + : + : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} static ASM_FUNC_ATTR_INLINE block -vec_load_be_const(void) +asm_load_be_const(void) { #ifndef WORDS_BIGENDIAN - return ~ALIGNED_LOAD(&vec_bswap32_const); + return ALIGNED_LOAD (&vec_bswap32_const_neg, 0); #else static const block vec_dummy = { 0 }; return vec_dummy; #endif } - static ASM_FUNC_ATTR_INLINE block -vec_load_be(unsigned long offset, const unsigned char *ptr, - block be_bswap_const) +asm_vperm1(block vec, block mask) { -#ifndef WORDS_BIGENDIAN - block vec; - /* GCC vec_vsx_ld is generating two instructions on little-endian. Use - * lxvw4x directly instead. */ - __asm__ ("lxvw4x %x0,%1,%2\n\t" - : "=wa" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); - __asm__ ("vperm %0,%1,%1,%2\n\t" - : "=v" (vec) - : "v" (vec), "v" (be_bswap_const)); - return vec; -#else - (void)be_bswap_const; - return vec_vsx_ld (offset, ptr); -#endif + block o; + __asm__ volatile ("vperm %0,%1,%1,%2\n\t" + : "=v" (o) + : "v" (vec), "v" (mask)); + return o; } - -static ASM_FUNC_ATTR_INLINE void -vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr) +static ASM_FUNC_ATTR_INLINE block +asm_be_swap(block vec, block be_bswap_const) { + (void)be_bswap_const; #ifndef WORDS_BIGENDIAN - __asm__ ("stvx %0,%1,%2\n\t" - : - : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); + return asm_vperm1 (vec, be_bswap_const); #else - vec_vsx_st (vec, offset, ptr); + return vec; #endif } +static ASM_FUNC_ATTR_INLINE block +asm_load_be_noswap(unsigned long offset, const void *ptr) +{ + block vec; + __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" + : "=wa" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */ + return vec; +} static ASM_FUNC_ATTR_INLINE void -vec_store_be(block vec, unsigned long offset, unsigned char *ptr, - block be_bswap_const) +asm_store_be_noswap(block vec, unsigned long offset, void *ptr) { -#ifndef WORDS_BIGENDIAN - /* GCC vec_vsx_st is generating two instructions on little-endian. Use - * stxvw4x directly instead. */ - __asm__ ("vperm %0,%1,%1,%2\n\t" - : "=v" (vec) - : "v" (vec), "v" (be_bswap_const)); - __asm__ ("stxvw4x %x0,%1,%2\n\t" - : - : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); -#else - (void)be_bswap_const; - vec_vsx_st (vec, offset, ptr); -#endif + /* NOTE: vec be-swapped using 'asm_be_swap' by caller */ + __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" + : + : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); } +static ASM_FUNC_ATTR_INLINE block +asm_add_uint128(block a, block b) +{ + block res; + __asm__ volatile ("vadduqm %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} static ASM_FUNC_ATTR_INLINE block -vec_add_uint128(block a, block b) +asm_xor(block a, block b) { -#if 1 block res; - /* Use assembly as GCC (v8.3) generates slow code for vec_vadduqm. */ - __asm__ ("vadduqm %0,%1,%2\n\t" - : "=v" (res) - : "v" (a), "v" (b)); + __asm__ volatile ("vxor %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); return res; -#else - return (block)vec_vadduqm((vector __uint128_t)a, (vector __uint128_t)b); -#endif +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; } @@ -250,7 +351,7 @@ _gcry_aes_sbox4_ppc8(u32 fourbytes) void _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); union { PROPERLY_ALIGNED_TYPE dummy; @@ -345,11 +446,11 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) for (r = 0; r <= rounds; r++) { #ifndef WORDS_BIGENDIAN - VEC_STORE_BE(&ekey[r], ALIGNED_LOAD(&ekey[r]), bige_const); + VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const); #else - block rvec = ALIGNED_LOAD(&ekey[r]); - ALIGNED_STORE(&ekey[r], - vec_perm(rvec, rvec, vec_bswap32_const)); + block rvec = ALIGNED_LOAD (ekey, r); + ALIGNED_STORE (ekey, r, + vec_perm(rvec, rvec, vec_bswap32_const)); (void)bige_const; #endif } @@ -378,7 +479,7 @@ aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) rr = rounds; for (r = 0, rr = rounds; r <= rounds; r++, rr--) { - ALIGNED_STORE(&dkey[r], ALIGNED_LOAD(&ekey[rr])); + ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr)); } } @@ -394,18 +495,18 @@ unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, unsigned char *out, const unsigned char *in) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); const u128_t *rk = (u128_t *)&ctx->keyschenc; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block b; - b = VEC_LOAD_BE (in, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); return 0; /* does not use stack */ } @@ -415,18 +516,18 @@ unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, unsigned char *out, const unsigned char *in) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); const u128_t *rk = (u128_t *)&ctx->keyschdec; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block b; - b = VEC_LOAD_BE (in, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); AES_DECRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); return 0; /* does not use stack */ } @@ -436,41 +537,41 @@ void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; + ROUND_KEY_VARIABLES_ALL; block rkeylast_orig; block iv; - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); - PRELOAD_ROUND_KEYS (rounds); + PRELOAD_ROUND_KEYS_ALL (rounds); rkeylast_orig = rkeylast; for (; nblocks; nblocks--) { - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const); + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); - AES_ENCRYPT (iv, rounds); + AES_ENCRYPT_ALL (iv, rounds); - VEC_STORE_BE (out, iv, bige_const); + VEC_STORE_BE (out, 0, iv, bige_const); out++; in++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; @@ -483,7 +584,7 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, block b0, b1, b2, b3, b4, b5, b6, b7; block rkey; - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; @@ -491,34 +592,42 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, for (; nblocks >= 8; nblocks -= 8) { in0 = iv; - in1 = VEC_LOAD_BE (in + 0, bige_const); - in2 = VEC_LOAD_BE (in + 1, bige_const); - in3 = VEC_LOAD_BE (in + 2, bige_const); - in4 = VEC_LOAD_BE (in + 3, bige_const); - in5 = VEC_LOAD_BE (in + 4, bige_const); - in6 = VEC_LOAD_BE (in + 5, bige_const); - in7 = VEC_LOAD_BE (in + 6, bige_const); - iv = VEC_LOAD_BE (in + 7, bige_const); - - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; - b4 = rkey0 ^ in4; - b5 = rkey0 ^ in5; - b6 = rkey0 ^ in6; - b7 = rkey0 ^ in7; + in1 = VEC_LOAD_BE_NOSWAP (in, 0); + in2 = VEC_LOAD_BE_NOSWAP (in, 1); + in3 = VEC_LOAD_BE_NOSWAP (in, 2); + in4 = VEC_LOAD_BE_NOSWAP (in, 3); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in5 = VEC_LOAD_BE_NOSWAP (in, 4); + in6 = VEC_LOAD_BE_NOSWAP (in, 5); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in7 = VEC_LOAD_BE_NOSWAP (in, 6); + iv = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in7 = VEC_BE_SWAP (in7, bige_const); + iv = VEC_BE_SWAP (iv, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -542,48 +651,60 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ in1); - b1 = vec_cipherlast_be (b1, rkey ^ in2); - b2 = vec_cipherlast_be (b2, rkey ^ in3); - b3 = vec_cipherlast_be (b3, rkey ^ in4); - b4 = vec_cipherlast_be (b4, rkey ^ in5); - b5 = vec_cipherlast_be (b5, rkey ^ in6); - b6 = vec_cipherlast_be (b6, rkey ^ in7); - b7 = vec_cipherlast_be (b7, rkey ^ iv); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in4); + in7 = asm_xor (rkeylast, in7); + in0 = asm_xor (rkeylast, iv); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, in5); + b5 = asm_cipherlast_be (b5, in6); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, in7); + b7 = asm_cipherlast_be (b7, in0); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { in0 = iv; - in1 = VEC_LOAD_BE (in + 0, bige_const); - in2 = VEC_LOAD_BE (in + 1, bige_const); - in3 = VEC_LOAD_BE (in + 2, bige_const); - iv = VEC_LOAD_BE (in + 3, bige_const); + in1 = VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in, 1, bige_const); + in3 = VEC_LOAD_BE (in, 2, bige_const); + iv = VEC_LOAD_BE (in, 3, bige_const); - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -607,16 +728,18 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ in1); - b1 = vec_cipherlast_be (b1, rkey ^ in2); - b2 = vec_cipherlast_be (b2, rkey ^ in3); - b3 = vec_cipherlast_be (b3, rkey ^ iv); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in0 = asm_xor (rkeylast, iv); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in0); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -625,20 +748,20 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, for (; nblocks; nblocks--) { - bin = VEC_LOAD_BE (in, bige_const); + bin = VEC_LOAD_BE (in, 0, bige_const); rkeylast = rkeylast_orig ^ bin; b = iv; iv = bin; AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); out++; in++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } @@ -646,41 +769,41 @@ void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; + ROUND_KEY_VARIABLES_ALL; block lastiv, b; + unsigned int outadd = !cbc_mac; - lastiv = VEC_LOAD_BE (iv_arg, bige_const); + lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const); - PRELOAD_ROUND_KEYS (rounds); + PRELOAD_ROUND_KEYS_ALL (rounds); for (; nblocks; nblocks--) { - b = lastiv ^ VEC_LOAD_BE (in, bige_const); + b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const); - AES_ENCRYPT (b, rounds); + AES_ENCRYPT_ALL (b, rounds); lastiv = b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in++; - if (!cbc_mac) - out++; + out += outadd; } - VEC_STORE_BE (iv_arg, lastiv, bige_const); + VEC_STORE_BE (iv_arg, 0, lastiv, bige_const); } void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschdec; const u128_t *in = (const u128_t *)inbuf_arg; @@ -699,41 +822,49 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, ctx->decryption_prepared = 1; } - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; for (; nblocks >= 8; nblocks -= 8) { - in0 = VEC_LOAD_BE (in + 0, bige_const); - in1 = VEC_LOAD_BE (in + 1, bige_const); - in2 = VEC_LOAD_BE (in + 2, bige_const); - in3 = VEC_LOAD_BE (in + 3, bige_const); - in4 = VEC_LOAD_BE (in + 4, bige_const); - in5 = VEC_LOAD_BE (in + 5, bige_const); - in6 = VEC_LOAD_BE (in + 6, bige_const); - in7 = VEC_LOAD_BE (in + 7, bige_const); - - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; - b4 = rkey0 ^ in4; - b5 = rkey0 ^ in5; - b6 = rkey0 ^ in6; - b7 = rkey0 ^ in7; + in0 = VEC_LOAD_BE_NOSWAP (in, 0); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -757,48 +888,60 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ iv); - b1 = vec_ncipherlast_be (b1, rkey ^ in0); - b2 = vec_ncipherlast_be (b2, rkey ^ in1); - b3 = vec_ncipherlast_be (b3, rkey ^ in2); - b4 = vec_ncipherlast_be (b4, rkey ^ in3); - b5 = vec_ncipherlast_be (b5, rkey ^ in4); - b6 = vec_ncipherlast_be (b6, rkey ^ in5); - b7 = vec_ncipherlast_be (b7, rkey ^ in6); + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + b0 = asm_ncipherlast_be (b0, iv); iv = in7; - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b1 = asm_ncipherlast_be (b1, in0); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, in3); + b5 = asm_ncipherlast_be (b5, in4); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, in5); + b7 = asm_ncipherlast_be (b7, in6); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - in0 = VEC_LOAD_BE (in + 0, bige_const); - in1 = VEC_LOAD_BE (in + 1, bige_const); - in2 = VEC_LOAD_BE (in + 2, bige_const); - in3 = VEC_LOAD_BE (in + 3, bige_const); + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -822,17 +965,21 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ iv); - b1 = vec_ncipherlast_be (b1, rkey ^ in0); - b2 = vec_ncipherlast_be (b2, rkey ^ in1); - b3 = vec_ncipherlast_be (b3, rkey ^ in2); + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + + b0 = asm_ncipherlast_be (b0, iv); iv = in3; + b1 = asm_ncipherlast_be (b1, in0); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -843,17 +990,17 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, { rkeylast = rkeylast_orig ^ iv; - iv = VEC_LOAD_BE (in, bige_const); + iv = VEC_LOAD_BE (in, 0, bige_const); b = iv; AES_DECRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } @@ -863,7 +1010,7 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, { static const unsigned char vec_one_const[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; @@ -873,56 +1020,80 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, block rkeylast_orig; block ctr, b, one; - ctr = VEC_LOAD_BE (ctr_arg, bige_const); - one = VEC_LOAD_BE (&vec_one_const, bige_const); + ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const); + one = VEC_LOAD_BE (&vec_one_const, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; if (nblocks >= 4) { + block in0, in1, in2, in3, in4, in5, in6, in7; block b0, b1, b2, b3, b4, b5, b6, b7; block two, three, four; - block ctr4; block rkey; - two = vec_add_uint128 (one, one); - three = vec_add_uint128 (two, one); - four = vec_add_uint128 (two, two); + two = asm_add_uint128 (one, one); + three = asm_add_uint128 (two, one); + four = asm_add_uint128 (two, two); for (; nblocks >= 8; nblocks -= 8) { - ctr4 = vec_add_uint128 (ctr, four); - b0 = rkey0 ^ ctr; - b1 = rkey0 ^ vec_add_uint128 (ctr, one); - b2 = rkey0 ^ vec_add_uint128 (ctr, two); - b3 = rkey0 ^ vec_add_uint128 (ctr, three); - b4 = rkey0 ^ ctr4; - b5 = rkey0 ^ vec_add_uint128 (ctr4, one); - b6 = rkey0 ^ vec_add_uint128 (ctr4, two); - b7 = rkey0 ^ vec_add_uint128 (ctr4, three); - ctr = vec_add_uint128 (ctr4, four); + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b4 = asm_add_uint128 (ctr, four); + b5 = asm_add_uint128 (b1, four); + b6 = asm_add_uint128 (b2, four); + b7 = asm_add_uint128 (b3, four); + b0 = asm_xor (rkey0, ctr); + rkey = ALIGNED_LOAD (rk, 1); + ctr = asm_add_uint128 (b4, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + b0 = asm_cipher_be (b0, rkey); + b1 = asm_cipher_be (b1, rkey); + b2 = asm_cipher_be (b2, rkey); + b3 = asm_cipher_be (b3, rkey); + b4 = asm_xor (rkey0, b4); + b5 = asm_xor (rkey0, b5); + b6 = asm_xor (rkey0, b6); + b7 = asm_xor (rkey0, b7); + b4 = asm_cipher_be (b4, rkey); + b5 = asm_cipher_be (b5, rkey); + b6 = asm_cipher_be (b6, rkey); + b7 = asm_cipher_be (b7, rkey); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); - - DO_ROUND(1); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + in0 = VEC_LOAD_BE_NOSWAP (in, 0); DO_ROUND(2); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); DO_ROUND(3); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); DO_ROUND(4); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); DO_ROUND(5); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); DO_ROUND(6); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); DO_ROUND(7); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); DO_ROUND(8); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; DO_ROUND(9); + if (rounds >= 12) { DO_ROUND(10); @@ -936,43 +1107,68 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const)); - b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const)); - b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const)); - b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const)); - b4 = vec_cipherlast_be (b4, rkey ^ VEC_LOAD_BE (in + 4, bige_const)); - b5 = vec_cipherlast_be (b5, rkey ^ VEC_LOAD_BE (in + 5, bige_const)); - b6 = vec_cipherlast_be (b6, rkey ^ VEC_LOAD_BE (in + 6, bige_const)); - b7 = vec_cipherlast_be (b7, rkey ^ VEC_LOAD_BE (in + 7, bige_const)); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + in4 = asm_xor (rkeylast, in4); + in5 = asm_xor (rkeylast, in5); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + in6 = asm_xor (rkeylast, in6); + in7 = asm_xor (rkeylast, in7); + b4 = asm_cipherlast_be (b4, in4); + b5 = asm_cipherlast_be (b5, in5); + b6 = asm_cipherlast_be (b6, in6); + b7 = asm_cipherlast_be (b7, in7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - b0 = rkey0 ^ ctr; - b1 = rkey0 ^ vec_add_uint128 (ctr, one); - b2 = rkey0 ^ vec_add_uint128 (ctr, two); - b3 = rkey0 ^ vec_add_uint128 (ctr, three); - ctr = vec_add_uint128 (ctr, four); + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b0 = asm_xor (rkey0, ctr); + ctr = asm_add_uint128 (ctr, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -982,6 +1178,12 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); + + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); + DO_ROUND(9); if (rounds >= 12) { @@ -996,16 +1198,21 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const)); - b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const)); - b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const)); - b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const)); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + in += 4; out += 4; nblocks -= 4; @@ -1015,18 +1222,18 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, for (; nblocks; nblocks--) { b = ctr; - ctr = vec_add_uint128 (ctr, one); - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const); + ctr = asm_add_uint128 (ctr, one); + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); out++; in++; } - VEC_STORE_BE (ctr_arg, ctr, bige_const); + VEC_STORE_BE (ctr_arg, 0, ctr, bige_const); } @@ -1034,7 +1241,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; @@ -1043,16 +1250,16 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, block l0, l1, l2, l; block b0, b1, b2, b3, b4, b5, b6, b7, b; block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; - block rkey; + block rkey, rkeylf; block ctr, iv; ROUND_KEY_VARIABLES; - iv = VEC_LOAD_BE (c->u_iv.iv, bige_const); - ctr = VEC_LOAD_BE (c->u_ctr.ctr, bige_const); + iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const); - l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], bige_const); - l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], bige_const); - l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], bige_const); + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); if (encrypt) { @@ -1062,8 +1269,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1074,7 +1281,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, AES_ENCRYPT (b, rounds); b ^= iv; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1082,16 +1289,25 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); - b4 = VEC_LOAD_BE (in + 4, bige_const); - b5 = VEC_LOAD_BE (in + 5, bige_const); - b6 = VEC_LOAD_BE (in + 6, bige_const); - b7 = VEC_LOAD_BE (in + 7, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; @@ -1117,15 +1333,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv7 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1134,7 +1350,20 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + DO_ROUND(9); if (rounds >= 12) { @@ -1149,37 +1378,42 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND - rkey = rkeylast ^ rkey0; - b0 = vec_cipherlast_be (b0, rkey ^ iv0); - b1 = vec_cipherlast_be (b1, rkey ^ iv1); - b2 = vec_cipherlast_be (b2, rkey ^ iv2); - b3 = vec_cipherlast_be (b3, rkey ^ iv3); - b4 = vec_cipherlast_be (b4, rkey ^ iv4); - b5 = vec_cipherlast_be (b5, rkey ^ iv5); - b6 = vec_cipherlast_be (b6, rkey ^ iv6); - b7 = vec_cipherlast_be (b7, rkey ^ iv7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_cipherlast_be (b0, iv0); + b1 = asm_cipherlast_be (b1, iv1); + b2 = asm_cipherlast_be (b2, iv2); + b3 = asm_cipherlast_be (b3, iv3); + b4 = asm_cipherlast_be (b4, iv4); + b5 = asm_cipherlast_be (b5, iv5); + b6 = asm_cipherlast_be (b6, iv6); + b7 = asm_cipherlast_be (b7, iv7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1197,11 +1431,11 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1226,15 +1460,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND rkey = rkeylast ^ rkey0; - b0 = vec_cipherlast_be (b0, rkey ^ iv0); - b1 = vec_cipherlast_be (b1, rkey ^ iv1); - b2 = vec_cipherlast_be (b2, rkey ^ iv2); - b3 = vec_cipherlast_be (b3, rkey ^ iv3); + b0 = asm_cipherlast_be (b0, rkey ^ iv0); + b1 = asm_cipherlast_be (b1, rkey ^ iv1); + b2 = asm_cipherlast_be (b2, rkey ^ iv2); + b3 = asm_cipherlast_be (b3, rkey ^ iv3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -1243,8 +1477,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1255,7 +1489,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, AES_ENCRYPT (b, rounds); b ^= iv; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1275,8 +1509,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1287,7 +1521,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1295,16 +1529,25 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); - b4 = VEC_LOAD_BE (in + 4, bige_const); - b5 = VEC_LOAD_BE (in + 5, bige_const); - b6 = VEC_LOAD_BE (in + 6, bige_const); - b7 = VEC_LOAD_BE (in + 7, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); iv ^= rkey0; @@ -1328,15 +1571,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv7 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1345,7 +1588,20 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + DO_ROUND(9); if (rounds >= 12) { @@ -1360,39 +1616,44 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND - rkey = rkeylast ^ rkey0; - b0 = vec_ncipherlast_be (b0, rkey ^ iv0); - b1 = vec_ncipherlast_be (b1, rkey ^ iv1); - b2 = vec_ncipherlast_be (b2, rkey ^ iv2); - b3 = vec_ncipherlast_be (b3, rkey ^ iv3); - b4 = vec_ncipherlast_be (b4, rkey ^ iv4); - b5 = vec_ncipherlast_be (b5, rkey ^ iv5); - b6 = vec_ncipherlast_be (b6, rkey ^ iv6); - b7 = vec_ncipherlast_be (b7, rkey ^ iv7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); + b0 = asm_ncipherlast_be (b0, iv0); + b1 = asm_ncipherlast_be (b1, iv1); + b2 = asm_ncipherlast_be (b2, iv2); + b3 = asm_ncipherlast_be (b3, iv3); + b4 = asm_ncipherlast_be (b4, iv4); + b5 = asm_ncipherlast_be (b5, iv5); + b6 = asm_ncipherlast_be (b6, iv6); + b7 = asm_ncipherlast_be (b7, iv7); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; - in += 8; + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); iv ^= rkey0; @@ -1408,11 +1669,11 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1437,15 +1698,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND rkey = rkeylast ^ rkey0; - b0 = vec_ncipherlast_be (b0, rkey ^ iv0); - b1 = vec_ncipherlast_be (b1, rkey ^ iv1); - b2 = vec_ncipherlast_be (b2, rkey ^ iv2); - b3 = vec_ncipherlast_be (b3, rkey ^ iv3); + b0 = asm_ncipherlast_be (b0, rkey ^ iv0); + b1 = asm_ncipherlast_be (b1, rkey ^ iv1); + b2 = asm_ncipherlast_be (b2, rkey ^ iv2); + b3 = asm_ncipherlast_be (b3, rkey ^ iv3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1456,8 +1717,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1468,15 +1729,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; } } - VEC_STORE_BE (c->u_iv.iv, iv, bige_const); - VEC_STORE_BE (c->u_ctr.ctr, ctr, bige_const); + VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const); + VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const); c->u_mode.ocb.data_nblocks = data_nblocks; return 0; @@ -1485,7 +1746,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *abuf = (const u128_t *)abuf_arg; @@ -1498,19 +1759,19 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, block ctr, iv; ROUND_KEY_VARIABLES; - iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, bige_const); - ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, bige_const); + iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const); - l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], bige_const); - l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], bige_const); - l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], bige_const); + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); PRELOAD_ROUND_KEYS (rounds); for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (abuf, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1524,16 +1785,16 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (abuf + 0, bige_const); - b1 = VEC_LOAD_BE (abuf + 1, bige_const); - b2 = VEC_LOAD_BE (abuf + 2, bige_const); - b3 = VEC_LOAD_BE (abuf + 3, bige_const); - b4 = VEC_LOAD_BE (abuf + 4, bige_const); - b5 = VEC_LOAD_BE (abuf + 5, bige_const); - b6 = VEC_LOAD_BE (abuf + 6, bige_const); - b7 = VEC_LOAD_BE (abuf + 7, bige_const); + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); + b4 = VEC_LOAD_BE (abuf, 4, bige_const); + b5 = VEC_LOAD_BE (abuf, 5, bige_const); + b6 = VEC_LOAD_BE (abuf, 6, bige_const); + b7 = VEC_LOAD_BE (abuf, 7, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const); frkey = rkey0; iv ^= frkey; @@ -1558,15 +1819,15 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, iv = iv7 ^ frkey; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1591,14 +1852,14 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey); - b1 = vec_cipherlast_be (b1, rkey); - b2 = vec_cipherlast_be (b2, rkey); - b3 = vec_cipherlast_be (b3, rkey); - b4 = vec_cipherlast_be (b4, rkey); - b5 = vec_cipherlast_be (b5, rkey); - b6 = vec_cipherlast_be (b6, rkey); - b7 = vec_cipherlast_be (b7, rkey); + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); + b4 = asm_cipherlast_be (b4, rkey); + b5 = asm_cipherlast_be (b5, rkey); + b6 = asm_cipherlast_be (b6, rkey); + b7 = asm_cipherlast_be (b7, rkey); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; @@ -1607,12 +1868,12 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (abuf + 0, bige_const); - b1 = VEC_LOAD_BE (abuf + 1, bige_const); - b2 = VEC_LOAD_BE (abuf + 2, bige_const); - b3 = VEC_LOAD_BE (abuf + 3, bige_const); + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); frkey = rkey0; iv ^= frkey; @@ -1629,11 +1890,11 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, iv = iv3 ^ frkey; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1658,10 +1919,10 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey); - b1 = vec_cipherlast_be (b1, rkey); - b2 = vec_cipherlast_be (b2, rkey); - b3 = vec_cipherlast_be (b3, rkey); + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1671,8 +1932,8 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (abuf, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1684,8 +1945,8 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, abuf += 1; } - VEC_STORE_BE (c->u_mode.ocb.aad_offset, iv, bige_const); - VEC_STORE_BE (c->u_mode.ocb.aad_sum, ctr, bige_const); + VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const); + VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const); c->u_mode.ocb.aad_nblocks = data_nblocks; return 0; @@ -1696,44 +1957,59 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { +#ifdef WORDS_BIGENDIAN static const block vec_bswap64_const = - { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 }; static const block vec_bswap128_const = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; +#else + static const block vec_bswap64_const = + { ~8, ~9, ~10, ~11, ~12, ~13, ~14, ~15, ~0, ~1, ~2, ~3, ~4, ~5, ~6, ~7 }; + static const block vec_bswap128_const = + { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 }; + static const block vec_tweakin_swap_const = + { ~12, ~13, ~14, ~15, ~8, ~9, ~10, ~11, ~4, ~5, ~6, ~7, ~0, ~1, ~2, ~3 }; +#endif static const unsigned char vec_tweak_const[16] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 }; static const vector unsigned long long vec_shift63_const = { 63, 63 }; static const vector unsigned long long vec_shift1_const = { 1, 1 }; - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - block tweak_tmp, tweak_next, tweak; - block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey; + block tweak; + block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf; block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7; block tweak_const, bswap64_const, bswap128_const; vector unsigned long long shift63_const, shift1_const; ROUND_KEY_VARIABLES; - tweak_const = VEC_LOAD_BE (&vec_tweak_const, bige_const); - bswap64_const = ALIGNED_LOAD (&vec_bswap64_const); - bswap128_const = ALIGNED_LOAD (&vec_bswap128_const); - shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const); - shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const); + tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const); + bswap64_const = ALIGNED_LOAD (&vec_bswap64_const, 0); + bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0); + shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const, 0); + shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const, 0); - tweak_next = VEC_LOAD_BE (tweak_arg, bige_const); +#ifdef WORDS_BIGENDIAN + tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const); + tweak = asm_vperm1 (tweak, bswap128_const); +#else + tweak = VEC_LOAD_BE (tweak_arg, 0, vec_tweakin_swap_const); +#endif -#define GEN_TWEAK(tweak, tmp) /* Generate next tweak. */ \ - tmp = vec_vperm(tweak, tweak, bswap64_const); \ - tweak = vec_vperm(tweak, tweak, bswap128_const); \ - tmp = (block)(vec_sra((vector unsigned long long)tmp, shift63_const)) & \ - tweak_const; \ - tweak = (block)vec_sl((vector unsigned long long)tweak, shift1_const); \ - tweak = tweak ^ tmp; \ - tweak = vec_vperm(tweak, tweak, bswap128_const); +#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \ + do { \ + block tmp1, tmp2; \ + tmp1 = asm_vperm1((tin), bswap64_const); \ + tmp2 = (block)vec_sl((vector unsigned long long)(tin), shift1_const); \ + tmp1 = (block)(vec_sra((vector unsigned long long)tmp1, shift63_const)) & \ + tweak_const; \ + tout = asm_xor(tmp1, tmp2); \ + } while (0) if (encrypt) { @@ -1743,42 +2019,70 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks >= 8; nblocks -= 8) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak4 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak5 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak6 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak7 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; - b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0; - b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0; - b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0; - b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0; + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1787,7 +2091,20 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + DO_ROUND(9); if (rounds >= 12) { @@ -1802,51 +2119,62 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ tweak0); - b1 = vec_cipherlast_be (b1, rkey ^ tweak1); - b2 = vec_cipherlast_be (b2, rkey ^ tweak2); - b3 = vec_cipherlast_be (b3, rkey ^ tweak3); - b4 = vec_cipherlast_be (b4, rkey ^ tweak4); - b5 = vec_cipherlast_be (b5, rkey ^ tweak5); - b6 = vec_cipherlast_be (b6, rkey ^ tweak6); - b7 = vec_cipherlast_be (b7, rkey ^ tweak7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_cipherlast_be (b0, tweak0); + b1 = asm_cipherlast_be (b1, tweak1); + b2 = asm_cipherlast_be (b2, tweak2); + b3 = asm_cipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, tweak4); + b5 = asm_cipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, tweak6); + b7 = asm_cipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1871,15 +2199,15 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ tweak0); - b1 = vec_cipherlast_be (b1, rkey ^ tweak1); - b2 = vec_cipherlast_be (b2, rkey ^ tweak2); - b3 = vec_cipherlast_be (b3, rkey ^ tweak3); + b0 = asm_cipherlast_be (b0, rkey ^ tweak0); + b1 = asm_cipherlast_be (b1, rkey ^ tweak1); + b2 = asm_cipherlast_be (b2, rkey ^ tweak2); + b3 = asm_cipherlast_be (b3, rkey ^ tweak3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -1888,18 +2216,18 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks; nblocks--) { - tweak = tweak_next; + tweak0 = asm_vperm1 (tweak, bswap128_const); /* Xor-Encrypt/Decrypt-Xor block. */ - b = VEC_LOAD_BE (in, bige_const) ^ tweak; + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; /* Generate next tweak. */ - GEN_TWEAK (tweak_next, tweak_tmp); + GEN_TWEAK (tweak, tweak); AES_ENCRYPT (b, rounds); - b ^= tweak; - VEC_STORE_BE (out, b, bige_const); + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; @@ -1919,42 +2247,70 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks >= 8; nblocks -= 8) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak4 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak5 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak6 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak7 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; - b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0; - b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0; - b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0; - b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0; + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1963,7 +2319,20 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + DO_ROUND(9); if (rounds >= 12) { @@ -1978,51 +2347,62 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ tweak0); - b1 = vec_ncipherlast_be (b1, rkey ^ tweak1); - b2 = vec_ncipherlast_be (b2, rkey ^ tweak2); - b3 = vec_ncipherlast_be (b3, rkey ^ tweak3); - b4 = vec_ncipherlast_be (b4, rkey ^ tweak4); - b5 = vec_ncipherlast_be (b5, rkey ^ tweak5); - b6 = vec_ncipherlast_be (b6, rkey ^ tweak6); - b7 = vec_ncipherlast_be (b7, rkey ^ tweak7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_ncipherlast_be (b0, tweak0); + b1 = asm_ncipherlast_be (b1, tweak1); + b2 = asm_ncipherlast_be (b2, tweak2); + b3 = asm_ncipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, tweak4); + b5 = asm_ncipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, tweak6); + b7 = asm_ncipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -2047,15 +2427,15 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ tweak0); - b1 = vec_ncipherlast_be (b1, rkey ^ tweak1); - b2 = vec_ncipherlast_be (b2, rkey ^ tweak2); - b3 = vec_ncipherlast_be (b3, rkey ^ tweak3); + b0 = asm_ncipherlast_be (b0, rkey ^ tweak0); + b1 = asm_ncipherlast_be (b1, rkey ^ tweak1); + b2 = asm_ncipherlast_be (b2, rkey ^ tweak2); + b3 = asm_ncipherlast_be (b3, rkey ^ tweak3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -2064,25 +2444,30 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks; nblocks--) { - tweak = tweak_next; + tweak0 = asm_vperm1 (tweak, bswap128_const); /* Xor-Encrypt/Decrypt-Xor block. */ - b = VEC_LOAD_BE (in, bige_const) ^ tweak; + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; /* Generate next tweak. */ - GEN_TWEAK (tweak_next, tweak_tmp); + GEN_TWEAK (tweak, tweak); AES_DECRYPT (b, rounds); - b ^= tweak; - VEC_STORE_BE (out, b, bige_const); + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } } - VEC_STORE_BE (tweak_arg, tweak_next, bige_const); +#ifdef WORDS_BIGENDIAN + tweak = asm_vperm1 (tweak, bswap128_const); + VEC_STORE_BE (tweak_arg, 0, tweak, bige_const); +#else + VEC_STORE_BE (tweak_arg, 0, tweak, vec_tweakin_swap_const); +#endif #undef GEN_TWEAK } From f34de41b9db5412f253b9b984522427638aae115 Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Thu, 16 Jan 2020 14:38:29 +0900 Subject: [PATCH 04/27] Register DCO for Tianjia Zhang. -- Signed-off-by: NIIBE Yutaka --- AUTHORS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AUTHORS b/AUTHORS index 280d1303..8271c0d8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -199,6 +199,9 @@ Shawn Landden Stephan Mueller 2014-08-22:2008899.25OeoelVVA@myon.chronox.de: +Tianjia Zhang +2020-01-08:dcda0127-2f45-93a3-0736-27259a33bffa@linux.alibaba.com: + TomĂ¡Å¡ MrĂ¡z 2012-04-16:1334571250.5056.52.camel@vespa.frost.loc: From d154c1e9e11019980253f0a65758932cd0656470 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 22 Dec 2019 17:20:11 +0800 Subject: [PATCH 05/27] Add new curve named sm2p256v1. * cipher/ecc-curves.c (domain_parms): Add sm2p256v1 for SM2. * tests/curves.c (N_CURVES): Update N_CURVES for SM2. Signed-off-by: Tianjia Zhang --- cipher/ecc-curves.c | 14 ++++++++++++++ tests/curves.c | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cipher/ecc-curves.c b/cipher/ecc-curves.c index 52872c5e..1592d23a 100644 --- a/cipher/ecc-curves.c +++ b/cipher/ecc-curves.c @@ -115,6 +115,8 @@ static const struct { "secp256k1", "1.3.132.0.10" }, + { "sm2p256v1", "1.2.156.10197.1.301" }, + { NULL, NULL} }; @@ -512,6 +514,18 @@ static const ecc_domain_parms_t domain_parms[] = 1 }, + { + "sm2p256v1", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffeffffffffffffffffffffffffffffffff00000000ffffffffffffffff", + "0xfffffffeffffffffffffffffffffffffffffffff00000000fffffffffffffffc", + "0x28e9fa9e9d9f5e344d5a9e4bcf6509a7f39789f515ab8f92ddbcbd414d940e93", + "0xfffffffeffffffffffffffffffffffff7203df6b21c6052b53bbf40939d54123", + "0x32c4ae2c1f1981195f9904466a39c9948fe30bbff2660be1715a4589334c74c7", + "0xbc3736a2f4f6779c59bdcee36b692153d0a9877cc62a474002df32e52139f0a0", + 1 + }, + { NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } }; diff --git a/tests/curves.c b/tests/curves.c index ff244bd1..0dfa2acb 100644 --- a/tests/curves.c +++ b/tests/curves.c @@ -33,7 +33,7 @@ #include "t-common.h" /* Number of curves defined in ../cipger/ecc-curves.c */ -#define N_CURVES 25 +#define N_CURVES 26 /* A real world sample public key. */ static char const sample_key_1[] = From 176a5f162acd0cfebc5517d061205681bc3658d0 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 19 Jan 2020 08:38:54 +0200 Subject: [PATCH 06/27] Update .gitignore Signed-off-by: Tianjia Zhang --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 704d3ca0..99741c18 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,8 @@ cipher/libcipher.la compat/Makefile compat/libcompat.la doc/gcrypt.info +doc/gcrypt.info-1 +doc/gcrypt.info-2 doc/stamp-vti doc/version.texi doc/Makefile @@ -65,6 +67,7 @@ src/gcrypt.h src/hmac256 src/libgcrypt-config src/libgcrypt.la +src/libgcrypt.pc src/mpicalc src/versioninfo.rc src/*.exe @@ -103,6 +106,8 @@ tests/t-lock tests/t-mpi-bit tests/t-mpi-point tests/t-sexp +tests/t-secmem +tests/t-x448 tests/tsexp tests/version tests/*.exe From 43cfc1632dd3a9579a906f31cd3b6c88d242d1a5 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 19 Jan 2020 08:38:54 +0200 Subject: [PATCH 07/27] ecc: Wrong flag and elements_enc fix. * cipher/ecc.c (ecc_generate): Fix wrong flag and elements_enc. -- Signed-off-by: Tianjia Zhang --- cipher/ecc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index 921510cc..10e11243 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -577,7 +577,7 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) (&curve_flags, NULL, ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? "(flags param eddsa)" : - ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? + ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_DJB_TWEAK))? "(flags param djb-tweak)" : ((flags & PUBKEY_FLAG_PARAM))? "(flags param)" : ((flags & PUBKEY_FLAG_EDDSA))? @@ -1712,7 +1712,7 @@ gcry_pk_spec_t _gcry_pubkey_spec_ecc = GCRY_PK_ECC, { 0, 1 }, (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR), "ECC", ecc_names, - "pabgnhq", "pabgnhqd", "sw", "rs", "pabgnhq", + "pabgnhq", "pabgnhqd", "se", "rs", "pabgnhq", ecc_generate, ecc_check_secret_key, ecc_encrypt_raw, From 7e3aac7ba49b3b6e6c5ebe7c880b5b323c423ef7 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 19 Jan 2020 08:38:54 +0200 Subject: [PATCH 08/27] mpi: Fix error that point not uninitialized * cipher/ecc-curves.c (mpi_ec_get_elliptic_curve): Initialize E->G poing -- Signed-off-by: Tianjia Zhang --- cipher/ecc-curves.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cipher/ecc-curves.c b/cipher/ecc-curves.c index 1592d23a..92850ac7 100644 --- a/cipher/ecc-curves.c +++ b/cipher/ecc-curves.c @@ -1058,6 +1058,7 @@ mpi_ec_get_elliptic_curve (elliptic_curve_t *E, int *r_flags, goto leave; if (G) { + _gcry_mpi_point_init (&E->G); mpi_point_set (&E->G, G->x, G->y, G->z); mpi_point_set (G, NULL, NULL, NULL); mpi_point_release (G); From 5ebb2f0671c902863eee91cbcfc85a72be506410 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 19 Jan 2020 08:38:54 +0200 Subject: [PATCH 09/27] gcrypt.texi: fix GCRYCTL_GET_ALGO_NENCR typo * doc/gcrypt.texi: Fix GCRYCTL_GET_ALGO_NENC to GCRYCTL_GET_ALGO_NENCR. -- Signed-off-by: Jussi Kivilinna --- doc/gcrypt.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index d7bfa4c2..091704de 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -2824,7 +2824,7 @@ Return the number of elements a signature created with the algorithm @var{algo} consists of. Return 0 for an unknown algorithm or for an algorithm not capable of creating signatures. -@item GCRYCTL_GET_ALGO_NENC +@item GCRYCTL_GET_ALGO_NENCR Return the number of elements a encrypted message created with the algorithm @var{algo} consists of. Return 0 for an unknown algorithm or for an algorithm not capable of encryption. From 4997139b3e83761c9af0246cec829305c3d7d13b Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Tue, 21 Jan 2020 12:05:23 +0900 Subject: [PATCH 10/27] Avoid use of ulong in internal code. * configure.ac (HAVE_ULONG_TYPEDEF): Remove. * mpi/mpi-div.c (_gcry_mpi_fdiv_r_ui): Use unsigned long. (_gcry_mpi_divisible_ui): Likewise. * random/rndunix.c (_gcry_rndunix_gather_random): Likewise. * random/rndw32.c (_gcry_rndw32_gather_random_fast): Likewise. (ADDINT): Likewise. * random/rndw32ce.c (_gcry_rndw32ce_gather_random_fast): Likewise. * src/mpi.h: Follow the change. * src/types.h (HAVE_ULONG_TYPEDEF): Remove. Signed-off-by: NIIBE Yutaka --- configure.ac | 1 - mpi/mpi-div.c | 7 ++++--- random/rndunix.c | 4 ++-- random/rndw32.c | 4 ++-- random/rndw32ce.c | 2 +- src/mpi.h | 4 ++-- src/types.h | 6 ------ 7 files changed, 11 insertions(+), 17 deletions(-) diff --git a/configure.ac b/configure.ac index 4d4fb49a..57884162 100644 --- a/configure.ac +++ b/configure.ac @@ -834,7 +834,6 @@ AC_TYPE_PID_T GNUPG_CHECK_TYPEDEF(byte, HAVE_BYTE_TYPEDEF) GNUPG_CHECK_TYPEDEF(ushort, HAVE_USHORT_TYPEDEF) -GNUPG_CHECK_TYPEDEF(ulong, HAVE_ULONG_TYPEDEF) GNUPG_CHECK_TYPEDEF(u16, HAVE_U16_TYPEDEF) GNUPG_CHECK_TYPEDEF(u32, HAVE_U32_TYPEDEF) diff --git a/mpi/mpi-div.c b/mpi/mpi-div.c index 9ac99c31..eb621fe4 100644 --- a/mpi/mpi-div.c +++ b/mpi/mpi-div.c @@ -64,8 +64,9 @@ _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ) * rem is optional */ -ulong -_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor ) +unsigned long +_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, + unsigned long divisor ) { mpi_limb_t rlimb; @@ -321,7 +322,7 @@ _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned int count ) * (note: divisor must fit into a limb) */ int -_gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor ) +_gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor ) { return !_gcry_mpih_mod_1( dividend->d, dividend->nlimbs, divisor ); } diff --git a/random/rndunix.c b/random/rndunix.c index fcb45b78..aff2f85d 100644 --- a/random/rndunix.c +++ b/random/rndunix.c @@ -894,7 +894,7 @@ _gcry_rndunix_gather_random (void (*add)(const void*, size_t, /* now read from the gatherer */ while( length ) { int goodness; - ulong subtract; + unsigned long subtract; if( read_a_msg( pipedes[0], &msg ) ) { log_error("reading from gatherer pipe failed: %s\n", @@ -928,7 +928,7 @@ _gcry_rndunix_gather_random (void (*add)(const void*, size_t, (*add)( msg.data, n, origin ); /* this is the trick how we cope with the goodness */ - subtract = (ulong)n * goodness / 100; + subtract = (unsigned long)n * goodness / 100; /* subtract at least 1 byte to avoid infinite loops */ length -= subtract ? subtract : 1; } diff --git a/random/rndw32.c b/random/rndw32.c index 08a8867d..b3f63d20 100644 --- a/random/rndw32.c +++ b/random/rndw32.c @@ -845,10 +845,10 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, We discard the upper 32-bit of those values. */ { - byte buffer[20*sizeof(ulong)], *bufptr; + byte buffer[20*sizeof(unsigned long)], *bufptr; bufptr = buffer; -#define ADDINT(f) do { ulong along = (ulong)(f); \ +#define ADDINT(f) do { unsigned long along = (unsigned long)(f); \ memcpy (bufptr, &along, sizeof (along) ); \ bufptr += sizeof (along); \ } while (0) diff --git a/random/rndw32ce.c b/random/rndw32ce.c index b485eef2..873e8460 100644 --- a/random/rndw32ce.c +++ b/random/rndw32ce.c @@ -115,7 +115,7 @@ _gcry_rndw32ce_gather_random_fast (void (*add)(const void*, size_t, memcpy (bufptr, &along, sizeof (along)); \ bufptr += sizeof (along); \ } while (0) - unsigned char buffer[20*sizeof(ulong)], *bufptr; + unsigned char buffer[20*sizeof(unsigned long)], *bufptr; bufptr = buffer; ADD (HWND, GetActiveWindow ()); diff --git a/src/mpi.h b/src/mpi.h index c342ff48..39312fc3 100644 --- a/src/mpi.h +++ b/src/mpi.h @@ -182,14 +182,14 @@ gpg_err_code_t _gcry_mpi_to_octet_string (unsigned char **r_frame, #define mpi_tdiv_q_2exp(a,b,c) _gcry_mpi_tdiv_q_2exp((a),(b),(c)) #define mpi_divisible_ui(a,b) _gcry_mpi_divisible_ui((a),(b)) -ulong _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor ); +unsigned long _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, unsigned long divisor ); void _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_fdiv_q( gcry_mpi_t quot, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_fdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_tdiv_r( gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den); void _gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den); void _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned count ); -int _gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor ); +int _gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor ); /*-- mpi-mod.c --*/ diff --git a/src/types.h b/src/types.h index 645ddd62..39393be1 100644 --- a/src/types.h +++ b/src/types.h @@ -70,12 +70,6 @@ # define HAVE_USHORT_TYPEDEF #endif -#ifndef HAVE_ULONG_TYPEDEF -# undef ulong /* In case there is a macro with that name. */ - typedef unsigned long ulong; -# define HAVE_ULONG_TYPEDEF -#endif - #ifndef HAVE_U16_TYPEDEF # undef u16 /* In case there is a macro with that name. */ # if SIZEOF_UNSIGNED_INT == 2 From 95e9cee802419adf6f4b01b29d7874793004fa8d Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Tue, 21 Jan 2020 13:16:47 +0900 Subject: [PATCH 11/27] tests: Fix check_pubkey. * tests/basic.c (check_pubkey): Fix constants of pubkeys. Signed-off-by: Tianjia Zhang Co-authored-by: NIIBE Yutaka --- tests/basic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/basic.c b/tests/basic.c index 8337bcfb..a3454abf 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -12984,7 +12984,7 @@ check_pubkey (void) { static const test_spec_pubkey_t pubkeys[] = { { - GCRY_PK_RSA, FLAG_CRYPT | FLAG_SIGN, + GCRY_PK_RSA, FLAG_CRYPT | FLAG_SIGN | FLAG_GRIP, { "(private-key\n" " (rsa\n" @@ -13022,7 +13022,7 @@ check_pubkey (void) "\xa2\x5d\x3d\x69\xf8\x6d\x37\xa4\xf9\x39"} }, { - GCRY_PK_DSA, FLAG_SIGN, + GCRY_PK_DSA, FLAG_SIGN | FLAG_GRIP, { "(private-key\n" " (DSA\n" @@ -13067,7 +13067,7 @@ check_pubkey (void) "\x4a\xa6\xf9\xeb\x23\xbf\xa9\x12\x2d\x5b" } }, { - GCRY_PK_ELG, FLAG_SIGN | FLAG_CRYPT, + GCRY_PK_ELG, FLAG_SIGN | FLAG_CRYPT | FLAG_GRIP, { "(private-key\n" " (ELG\n" @@ -13246,7 +13246,7 @@ check_pubkey (void) " 4DDFF75C45415C1D9DD9DD33612CD530EFE137C7C90CD4" " 0B0F5621DC3AC1B751CFA0E2634FA0503B3D52639F5D7F" " B72AFD61EA199441D943FFE7F0C70A2759A3CDB84C114E" - " 1F9339FDF27F35ECA93677BEEC#)))\n" + " 1F9339FDF27F35ECA93677BEEC#)))\n", "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } @@ -13268,7 +13268,7 @@ check_pubkey (void) " (curve secp256k1)\n" " (q #0439A36013301597DAEF41FBE593A02CC513D0B55527EC2D" " F1050E2E8FF49C85C23CBE7DED0E7CE6A594896B8F62888F" - " DBC5C8821305E2EA42BF01E37300116281#)))\n" + " DBC5C8821305E2EA42BF01E37300116281#)))\n", "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } From 8d9958910e54f3fecbab6e133c3971843f6ef310 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 20 Jan 2020 11:35:28 +0800 Subject: [PATCH 12/27] ecc: Simplify signature code * cipher/ecc-gost.c (_gcry_ecc_gost_sign): Use implemented function. * cipher/ecc.c (ecc_verify): Remove redundant code. -- Signed-off-by: Tianjia Zhang --- cipher/ecc-gost.c | 18 ++++-------------- cipher/ecc.c | 22 +--------------------- 2 files changed, 5 insertions(+), 35 deletions(-) diff --git a/cipher/ecc-gost.c b/cipher/ecc-gost.c index e9dfc597..44654a47 100644 --- a/cipher/ecc-gost.c +++ b/cipher/ecc-gost.c @@ -45,8 +45,7 @@ _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec, gcry_mpi_t k, dr, sum, ke, x, e; mpi_point_struct I; gcry_mpi_t hash; - const void *abuf; - unsigned int abits, qbits; + unsigned int qbits; if (DBG_CIPHER) log_mpidump ("gost sign hash ", input ); @@ -54,18 +53,9 @@ _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec, qbits = mpi_get_nbits (ec->n); /* Convert the INPUT into an MPI if needed. */ - if (mpi_is_opaque (input)) - { - abuf = mpi_get_opaque (input, &abits); - rc = _gcry_mpi_scan (&hash, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); - if (rc) - return rc; - if (abits > qbits) - mpi_rshift (hash, hash, abits - qbits); - } - else - hash = input; - + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; k = NULL; dr = mpi_alloc (0); diff --git a/cipher/ecc.c b/cipher/ecc.c index 10e11243..1195f5ec 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -813,27 +813,7 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) } else { - if (mpi_is_opaque (data)) - { - const void *abuf; - unsigned int abits, qbits; - gcry_mpi_t a; - - qbits = mpi_get_nbits (ec->n); - - abuf = mpi_get_opaque (data, &abits); - rc = _gcry_mpi_scan (&a, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); - if (!rc) - { - if (abits > qbits) - mpi_rshift (a, a, abits - qbits); - - rc = _gcry_ecc_ecdsa_verify (a, ec, sig_r, sig_s); - _gcry_mpi_release (a); - } - } - else - rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); + rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); } leave: From 6b55246c77089dd372eb1807808111660fd789c7 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 20 Jan 2020 11:42:24 +0800 Subject: [PATCH 13/27] Add elliptic curve SM2 implementation. * configure.ac (enabled_pubkey_ciphers): Add ecc-sm2. * cipher/Makefile.am (EXTRA_libcipher_la_SOURCES): Add ecc-sm2.c. * cipher/pubkey-util.c (_gcry_pk_util_parse_flaglist, _gcry_pk_util_preparse_sigval): Add sm2 flags. * cipher/ecc.c: Support ecc-sm2. * cipher/ecc-common.h: Add declarations for ecc-sm2. * cipher/ecc-sm2.c: New. * src/cipher.h: Define PUBKEY_FLAG_SM2. -- Signed-off-by: Tianjia Zhang --- cipher/Makefile.am | 2 +- cipher/ecc-common.h | 12 + cipher/ecc-sm2.c | 566 +++++++++++++++++++++++++++++++++++++++++++ cipher/ecc.c | 53 +++- cipher/pubkey-util.c | 7 + configure.ac | 3 +- src/cipher.h | 1 + 7 files changed, 630 insertions(+), 14 deletions(-) create mode 100644 cipher/ecc-sm2.c diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 020a9616..10a5ab62 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -87,7 +87,7 @@ EXTRA_libcipher_la_SOURCES = \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ - ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ + ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ diff --git a/cipher/ecc-common.h b/cipher/ecc-common.h index 7fbc950a..b8b7c763 100644 --- a/cipher/ecc-common.h +++ b/cipher/ecc-common.h @@ -125,4 +125,16 @@ gpg_err_code_t _gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec, gcry_mpi_t r, gcry_mpi_t s); +/*-- ecc-sm2.c --*/ +gpg_err_code_t _gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, + gcry_mpi_t input, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, + gcry_sexp_t data_list, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo); +gpg_err_code_t _gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s); + + #endif /*GCRY_ECC_COMMON_H*/ diff --git a/cipher/ecc-sm2.c b/cipher/ecc-sm2.c new file mode 100644 index 00000000..a6341132 --- /dev/null +++ b/cipher/ecc-sm2.c @@ -0,0 +1,566 @@ +/* ecc-sm2.c - Elliptic Curve SM2 implementation + * Copyright (C) 2020 Tianjia Zhang + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "bithelp.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + +#define MPI_NBYTES(m) ((mpi_get_nbits(m) + 7) / 8) + + +/* Key derivation function from X9.63/SECG */ +static gpg_err_code_t +kdf_x9_63 (int algo, const void *in, size_t inlen, void *out, size_t outlen) +{ + gpg_err_code_t rc; + gcry_md_hd_t hd; + int mdlen; + u32 counter = 1; + u32 counter_be; + unsigned char *dgst; + unsigned char *pout = out; + size_t rlen = outlen; + size_t len; + + rc = _gcry_md_open (&hd, algo, 0); + if (rc) + return rc; + + mdlen = _gcry_md_get_algo_dlen (algo); + + while (rlen > 0) + { + counter_be = be_bswap32 (counter); /* cpu_to_be32 */ + counter++; + + _gcry_md_write (hd, in, inlen); + _gcry_md_write (hd, &counter_be, sizeof(counter_be)); + + dgst = _gcry_md_read (hd, algo); + if (dgst == NULL) + { + rc = GPG_ERR_DIGEST_ALGO; + break; + } + + len = mdlen < rlen ? mdlen : rlen; /* min(mdlen, rlen) */ + memcpy (pout, dgst, len); + rlen -= len; + pout += len; + + _gcry_md_reset (hd); + } + + _gcry_md_close (hd); + return rc; +} + + +/* _gcry_ecc_sm2_encrypt description: + * input: + * data[0] : octet string + * output: A new S-expression with the parameters: + * a: c1 : generated ephemeral public key (kG) + * b: c3 : Hash(x2 || IN || y2) + * c: c2 : cipher + * + * sm2_decrypt description: + * in contrast to encrypt + */ +gpg_err_code_t +_gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, gcry_mpi_t input, mpi_ec_t ec) +{ + gpg_err_code_t rc; + const int algo = GCRY_MD_SM3; + gcry_md_hd_t md = NULL; + int mdlen; + unsigned char *dgst; + gcry_mpi_t k = NULL; + mpi_point_struct kG, kP; + gcry_mpi_t x1, y1; + gcry_mpi_t x2, y2; + gcry_mpi_t x2y2 = NULL; + unsigned char *in = NULL; + unsigned int inlen; + unsigned char *raw; + unsigned int rawlen; + unsigned char *cipher = NULL; + int i; + + point_init (&kG); + point_init (&kP); + x1 = mpi_new (0); + y1 = mpi_new (0); + x2 = mpi_new (0); + y2 = mpi_new (0); + + in = _gcry_mpi_get_buffer (input, 0, &inlen, NULL); + if (!in) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + cipher = xtrymalloc (inlen); + if (!cipher) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + /* rand k in [1, n-1] */ + k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM); + + /* [k]G = (x1, y1) */ + _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x1, y1, &kG, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: kG can not be a Point at Infinity!\n"); + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* [k]P = (x2, y2) */ + _gcry_mpi_ec_mul_point (&kP, k, ec->Q, ec); + if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* t = KDF(x2 || y2, klen) */ + x2y2 = _gcry_mpi_ec_ec2os (&kP, ec); + raw = mpi_get_opaque (x2y2, &rawlen); + rawlen = (rawlen + 7) / 8; + + /* skip the prefix '0x04' */ + raw += 1; + rawlen -= 1; + rc = kdf_x9_63 (algo, raw, rawlen, cipher, inlen); + if (rc) + goto leave; + + /* cipher = t xor in */ + for (i = 0; i < inlen; i++) + cipher[i] ^= in[i]; + + /* hash(x2 || IN || y2) */ + mdlen = _gcry_md_get_algo_dlen (algo); + rc = _gcry_md_open (&md, algo, 0); + if (rc) + goto leave; + _gcry_md_write (md, raw, MPI_NBYTES(x2)); + _gcry_md_write (md, in, inlen); + _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2)); + dgst = _gcry_md_read (md, algo); + if (dgst == NULL) + { + rc = GPG_ERR_DIGEST_ALGO; + goto leave; + } + + if (!rc) + { + gcry_mpi_t c1; + gcry_mpi_t c3; + gcry_mpi_t c2; + + c3 = mpi_new (0); + c2 = mpi_new (0); + + c1 = _gcry_ecc_ec2os (x1, y1, ec->p); + _gcry_mpi_set_opaque_copy (c3, dgst, mdlen * 8); + _gcry_mpi_set_opaque_copy (c2, cipher, inlen * 8); + + rc = sexp_build (r_ciph, NULL, + "(enc-val(flags sm2)(sm2(a%M)(b%M)(c%M)))", + c1, c3, c2); + + mpi_free (c1); + mpi_free (c3); + mpi_free (c2); + } + +leave: + _gcry_md_close (md); + mpi_free (x2y2); + mpi_free (k); + + point_free (&kG); + point_free (&kP); + mpi_free (x1); + mpi_free (y1); + mpi_free (x2); + mpi_free (y2); + + xfree (cipher); + xfree (in); + + return rc; +} + + +gpg_err_code_t +_gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t data_list, mpi_ec_t ec) +{ + gpg_err_code_t rc; + gcry_mpi_t data_c1 = NULL; + gcry_mpi_t data_c3 = NULL; + gcry_mpi_t data_c2 = NULL; + + /* + * Extract the data. + */ + rc = sexp_extract_param (data_list, NULL, "/a/b/c", + &data_c1, &data_c3, &data_c2, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("ecc_decrypt d_c1", data_c1); + log_printmpi ("ecc_decrypt d_c3", data_c3); + log_printmpi ("ecc_decrypt d_c2", data_c2); + } + + { + const int algo = GCRY_MD_SM3; + gcry_md_hd_t md = NULL; + int mdlen; + unsigned char *dgst; + mpi_point_struct c1; + mpi_point_struct kP; + gcry_mpi_t x2, y2; + gcry_mpi_t x2y2 = NULL; + unsigned char *in = NULL; + unsigned int inlen; + unsigned char *plain = NULL; + unsigned char *raw; + unsigned int rawlen; + unsigned char *c3 = NULL; + unsigned int c3_len; + int i; + + point_init (&c1); + point_init (&kP); + x2 = mpi_new (0); + y2 = mpi_new (0); + + in = mpi_get_opaque (data_c2, &inlen); + inlen = (inlen + 7) / 8; + plain = xtrymalloc (inlen); + if (!plain) + { + rc = gpg_err_code_from_syserror (); + goto leave_main; + } + + rc = _gcry_ecc_os2ec (&c1, data_c1); + if (rc) + goto leave_main; + + if (!_gcry_mpi_ec_curve_point (&c1, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + /* [d]C1 = (x2, y2), C1 = [k]G */ + _gcry_mpi_ec_mul_point (&kP, ec->d, &c1, ec); + if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + /* t = KDF(x2 || y2, inlen) */ + x2y2 = _gcry_mpi_ec_ec2os (&kP, ec); + raw = mpi_get_opaque (x2y2, &rawlen); + rawlen = (rawlen + 7) / 8; + /* skip the prefix '0x04' */ + raw += 1; + rawlen -= 1; + rc = kdf_x9_63 (algo, raw, rawlen, plain, inlen); + if (rc) + goto leave_main; + + /* plain = C2 xor t */ + for (i = 0; i < inlen; i++) + plain[i] ^= in[i]; + + /* Hash(x2 || IN || y2) == C3 */ + mdlen = _gcry_md_get_algo_dlen (algo); + rc = _gcry_md_open (&md, algo, 0); + if (rc) + goto leave_main; + _gcry_md_write (md, raw, MPI_NBYTES(x2)); + _gcry_md_write (md, plain, inlen); + _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2)); + dgst = _gcry_md_read (md, algo); + if (dgst == NULL) + { + memset (plain, 0, inlen); + rc = GPG_ERR_DIGEST_ALGO; + goto leave_main; + } + c3 = mpi_get_opaque (data_c3, &c3_len); + c3_len = (c3_len + 7) / 8; + if (c3_len != mdlen || memcmp (dgst, c3, c3_len) != 0) + { + memset (plain, 0, inlen); + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + if (!rc) + { + gcry_mpi_t r; + + r = mpi_new (inlen * 8); + _gcry_mpi_set_buffer (r, plain, inlen, 0); + + rc = sexp_build (r_plain, NULL, "(value %m)", r); + + mpi_free (r); + } + + leave_main: + _gcry_md_close (md); + mpi_free (x2y2); + xfree (plain); + + point_free (&c1); + point_free (&kP); + mpi_free (x2); + mpi_free (y2); + } + + leave: + _gcry_mpi_release (data_c1); + _gcry_mpi_release (data_c3); + _gcry_mpi_release (data_c2); + + return rc; +} + + +/* Compute an SM2 signature. + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R and S. + */ +gpg_err_code_t +_gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo) +{ + gpg_err_code_t rc = 0; + int extraloops = 0; + gcry_mpi_t hash; + const void *abuf; + unsigned int abits, qbits; + gcry_mpi_t tmp = NULL; + gcry_mpi_t k = NULL; + gcry_mpi_t rk = NULL; + mpi_point_struct kG; + gcry_mpi_t x1; + + if (DBG_CIPHER) + log_mpidump ("sm2 sign hash ", input); + + qbits = mpi_get_nbits (ec->n); + + /* Convert the INPUT into an MPI if needed. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + point_init (&kG); + x1 = mpi_new (0); + rk = mpi_new (0); + tmp = mpi_new (0); + + for (;;) + { + /* rand k in [1, n-1] */ + if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo) + { + /* Use Pornin's method for deterministic DSA. If this + flag is set, it is expected that HASH is an opaque + MPI with the to be signed hash. That hash is also + used as h1 from 3.2.a. */ + if (!mpi_is_opaque (input)) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d, + abuf, (abits+7)/8, + hashalgo, extraloops); + if (rc) + goto leave; + extraloops++; + } + else + k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM); + + _gcry_dsa_modify_k (k, ec->n, qbits); + + /* [k]G = (x1, y1) */ + _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x1, NULL, &kG, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* r = (e + x1) % n */ + mpi_addm (r, hash, x1, ec->n); + + /* r != 0 && r + k != n */ + if (mpi_cmp_ui (r, 0) == 0) + continue; + mpi_add (rk, r, k); + if (mpi_cmp (rk, ec->n) == 0) + continue; + + /* s = ((d + 1)^-1 * (k - rd)) % n */ + mpi_addm (s, ec->d, GCRYMPI_CONST_ONE, ec->n); + mpi_invm (s, s, ec->n); + mpi_mulm (tmp, r, ec->d, ec->n); + mpi_subm (tmp, k, tmp, ec->n); + mpi_mulm (s, s, tmp, ec->n); + + /* s != 0 */ + if (mpi_cmp_ui (s, 0) == 0) + continue; + + break; /* Okay */ + } + + if (DBG_CIPHER) + { + log_mpidump ("sm2 sign result r ", r); + log_mpidump ("sm2 sign result s ", s); + } + +leave: + point_free (&kG); + mpi_free (k); + mpi_free (x1); + mpi_free (rk); + mpi_free (tmp); + + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* Verify an SM2 signature. + * Check if R and S verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t err = 0; + gcry_mpi_t hash = NULL; + gcry_mpi_t t = NULL; + mpi_point_struct sG, tP; + gcry_mpi_t x1, y1; + unsigned int nbits; + + /* r, s within [1, n-1] */ + if (mpi_cmp_ui (r, 1) < 0 || mpi_cmp (r, ec->n) > 0) + return GPG_ERR_BAD_SIGNATURE; + if (mpi_cmp_ui (s, 1) < 0 || mpi_cmp (s, ec->n) > 0) + return GPG_ERR_BAD_SIGNATURE; + + nbits = mpi_get_nbits (ec->n); + err = _gcry_dsa_normalize_hash (input, &hash, nbits); + if (err) + return err; + + point_init (&sG); + point_init (&tP); + x1 = mpi_new (0); + y1 = mpi_new (0); + t = mpi_new (0); + + /* t = (r + s) % n, t != 0 */ + mpi_addm (t, r, s, ec->n); + if (mpi_cmp_ui (t, 0) == 0) + { + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + /* sG + tP = (x1, y1) */ + _gcry_mpi_ec_mul_point (&sG, s, ec->G, ec); + _gcry_mpi_ec_mul_point (&tP, t, ec->Q, ec); + _gcry_mpi_ec_add_points (&sG, &sG, &tP, ec); + if (_gcry_mpi_ec_get_affine (x1, y1, &sG, ec)) + { + err = GPG_ERR_INV_DATA; + goto leave; + } + + /* R = (e + x1) % n */ + mpi_addm (t, hash, x1, ec->n); + + /* check R == r */ + if (mpi_cmp (t, r)) + { + if (DBG_CIPHER) + { + log_mpidump (" R", t); + log_mpidump (" r", r); + log_mpidump (" s", s); + } + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (DBG_CIPHER) + log_debug ("sm2 verify: Accepted\n"); + + leave: + point_free (&sG); + point_free (&tP); + mpi_free (x1); + mpi_free (y1); + mpi_free (t); + if (hash != input) + mpi_free (hash); + + return err; +} diff --git a/cipher/ecc.c b/cipher/ecc.c index 1195f5ec..49c2c0f6 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -69,6 +69,7 @@ static const char *ecc_names[] = "ecdh", "eddsa", "gost", + "sm2", NULL, }; @@ -723,6 +724,14 @@ ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) rc = sexp_build (r_sig, NULL, "(sig-val(gost(r%M)(s%M)))", sig_r, sig_s); } + else if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + rc = _gcry_ecc_sm2_sign (data, ec, sig_r, sig_s, + ctx.flags, ctx.hash_algo); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(sm2(r%M)(s%M)))", sig_r, sig_s); + } else { rc = _gcry_ecc_ecdsa_sign (data, ec, sig_r, sig_s, @@ -811,6 +820,10 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) { rc = _gcry_ecc_gost_verify (data, ec, sig_r, sig_s); } + else if ((sigflags & PUBKEY_FLAG_SM2)) + { + rc = _gcry_ecc_sm2_verify (data, ec, sig_r, sig_s); + } else { rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); @@ -918,6 +931,13 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + /* All encryption will be done, return it. */ + rc = _gcry_ecc_sm2_encrypt (r_ciph, data, ec); + goto leave; + } + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */ { mpi_point_struct R; /* Result that we return. */ @@ -1041,18 +1061,6 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, (nbits = ecc_get_nbits (keyparms))); - /* - * Extract the data. - */ - rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx); - if (rc) - goto leave; - rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL); - if (rc) - goto leave; - if (DBG_CIPHER) - log_printmpi ("ecc_decrypt d_e", data_e); - /* * Extract the key. */ @@ -1066,6 +1074,27 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + /* + * Extract the data. + */ + rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx); + if (rc) + goto leave; + if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + /* All decryption will be done, return it. */ + rc = _gcry_ecc_sm2_decrypt (r_plain, l1, ec); + goto leave; + } + else + { + rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt d_e", data_e); + } + if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK)) enable_specific_point_validation = 1; else diff --git a/cipher/pubkey-util.c b/cipher/pubkey-util.c index 4a6bf462..c52185de 100644 --- a/cipher/pubkey-util.c +++ b/cipher/pubkey-util.c @@ -81,6 +81,11 @@ _gcry_pk_util_parse_flaglist (gcry_sexp_t list, encoding = PUBKEY_ENC_RAW; flags |= PUBKEY_FLAG_RAW_FLAG; /* Explicitly given. */ } + else if (!memcmp (s, "sm2", 3)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_SM2 | PUBKEY_FLAG_RAW_FLAG; + } else if (!igninvflag) rc = GPG_ERR_INV_FLAG; break; @@ -429,6 +434,8 @@ _gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, const char **algo_names, *r_eccflags = PUBKEY_FLAG_EDDSA; if (!strcmp (name, "gost")) *r_eccflags = PUBKEY_FLAG_GOST; + if (!strcmp (name, "sm2")) + *r_eccflags = PUBKEY_FLAG_SM2; } *r_parms = l2; diff --git a/configure.ac b/configure.ac index 57884162..74ca9be0 100644 --- a/configure.ac +++ b/configure.ac @@ -2545,7 +2545,8 @@ LIST_MEMBER(ecc, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ - ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo" + ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo \ + ecc-sm2.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi diff --git a/src/cipher.h b/src/cipher.h index 5aac19f1..1fe50890 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -42,6 +42,7 @@ #define PUBKEY_FLAG_GOST (1 << 13) #define PUBKEY_FLAG_NO_KEYTEST (1 << 14) #define PUBKEY_FLAG_DJB_TWEAK (1 << 15) +#define PUBKEY_FLAG_SM2 (1 << 16) enum pk_operation From aa9c78afa1d867bb7b9b3c695cf31a832c9419e5 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 20 Jan 2020 11:42:25 +0800 Subject: [PATCH 14/27] tests: Add basic test cases for sm2 * tests/basic.c (check_pubkey): Add test cases for ecc-sm2. -- Original change was modified by gniibe to limit only for ECDSA. Signed-off-by: Tianjia Zhang --- tests/basic.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/basic.c b/tests/basic.c index a3454abf..beb5a5b2 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -12594,6 +12594,16 @@ check_pubkey_sign_ecdsa (int n, gcry_sexp_t skey, gcry_sexp_t pkey) /* */ "000102030405060708090A0B0C0D0E0F#))", 0 }, + { 256, + "(data (flags sm2)\n" + " (hash sm3 #112233445566778899AABBCCDDEEFF00" + /* */ "123456789ABCDEF0123456789ABCDEF0#))", + 0, + "(data (flags sm2)\n" + " (hash sm3 #B524F552CD82B8B028476E005C377FB1" + /* */ "9A87E6FC682D48BB5D42E3D9B9EFFE76#))", + 0 + }, { 0, NULL } }; @@ -13270,6 +13280,29 @@ check_pubkey (void) " F1050E2E8FF49C85C23CBE7DED0E7CE6A594896B8F62888F" " DBC5C8821305E2EA42BF01E37300116281#)))\n", + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } + }, + { /* sm2 test */ + GCRY_PK_ECDSA, FLAG_SIGN, + { + "(private-key\n" + " (ecc\n" + " (curve sm2p256v1)\n" + " (q #04" + " 8759389A34AAAD07ECF4E0C8C2650A4459C8D926EE2378324E0261C52538CB47" + " 7528106B1E0B7C8DD5FF29A9C86A89065656EB33154BC0556091EF8AC9D17D78#)" + " (d #41EBDBA9C98CBECCE7249CF18BFD427FF8EA0B2FAB7B9D305D9D9BF4DB6ADFC2#)" + "))", + + "(public-key\n" + " (ecc\n" + " (curve sm2p256v1)\n" + " (q #04" + " 8759389A34AAAD07ECF4E0C8C2650A4459C8D926EE2378324E0261C52538CB47" + " 7528106B1E0B7C8DD5FF29A9C86A89065656EB33154BC0556091EF8AC9D17D78#)" + "))", + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } } From 79ed620ec46adbb08f5cea6a4865a95a436e4109 Mon Sep 17 00:00:00 2001 From: Marvin W Date: Wed, 22 Jan 2020 19:36:13 +0200 Subject: [PATCH 15/27] Set vZZ.16b register to zero before use in armv8 gcm implementation * cipher/cipher-gcm-armv8-aarch64-ce.S (_gcry_ghash_setup_armv8_ce_pmull): Set vZZ to zero. -- Reported by "Marvin W." at https://dev.gnupg.org/D497: > > The register vZZ.16b is expected to be always 0 throughout the macros > in cipher/cipher-gcm-armv8-aarch64-ce.S. The PMUL_128x128 and REDUCTION > macros are used in gcry_ghash_setup_armv8_ce_pmull function, however that > function does not set vZZ.16b to zero. If previous use left `vZZ.16b > non-zero before gcry_ghash_setup_armv8_ce_pmull is called, this will cause > invalid GCM auth tag results. > > The patch resets vZZ.16b to 0 at the beginning of > gcry_ghash_setup_armv8_ce_pmull. > [jk: from differential web-ui to commit] Signed-off-by: Jussi Kivilinna --- cipher/cipher-gcm-armv8-aarch64-ce.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S index b0c2cccc..877207d3 100644 --- a/cipher/cipher-gcm-armv8-aarch64-ce.S +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -385,6 +385,8 @@ _gcry_ghash_setup_armv8_ce_pmull: GET_DATA_POINTER(x2, .Lrconst) + eor vZZ.16b, vZZ.16b, vZZ.16b + /* H¹ */ ld1 {rh1.16b}, [x0] rbit rh1.16b, rh1.16b From 4aa8ff904262f331abbb8c988069a7029ca13502 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 22 Jan 2020 19:36:13 +0200 Subject: [PATCH 16/27] tests/basic: add vector cluttering to detect implementation bugs * src/global.c (_gcry_check_version): Fix missing newline. * src/basic.c (ALWAYS_INLINE, CLUTTER_REGISTER_*, prepare_vector_data) (clutter_vector_registers): New. (progress_handler): Make static function. (check_bulk_cipher_modes, check_one_cipher_core_reset) (check_one_cipher_core, check_one_md, check_one_md_multi) (check_one_md_final, check_one_mac): Clutter vector registers before gcry_* calls to cipher/md/mac algorithms. -- Signed-off-by: Jussi Kivilinna --- src/global.c | 3 +- tests/basic.c | 290 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 290 insertions(+), 3 deletions(-) diff --git a/src/global.c b/src/global.c index d82c680a..be65df54 100644 --- a/src/global.c +++ b/src/global.c @@ -261,7 +261,8 @@ _gcry_check_version (const char *req_version) /* Compare version numbers. */ if ( my_major > rq_major || (my_major == rq_major && my_minor > rq_minor) - || (my_major == rq_major && my_minor == rq_minor && my_micro > rq_micro) + || (my_major == rq_major && my_minor == rq_minor + && my_micro > rq_micro) || (my_major == rq_major && my_minor == rq_minor && my_micro == rq_micro)) { diff --git a/tests/basic.c b/tests/basic.c index beb5a5b2..812bd89d 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -33,6 +33,10 @@ #define PGM "basic" #include "t-common.h" +#if __GNUC__ >= 4 +# define ALWAYS_INLINE __attribute__((always_inline)) +#endif + typedef struct test_spec_pubkey_key { const char *secret; @@ -191,7 +195,7 @@ show_mac_not_available (int algo) -void +static void progress_handler (void *cb_data, const char *what, int printchar, int current, int total) { @@ -207,6 +211,239 @@ progress_handler (void *cb_data, const char *what, int printchar, fflush (stdout); } + +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define CLUTTER_VECTOR_REGISTER_AMD64 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 16 +#elif defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) +# define CLUTTER_VECTOR_REGISTER_I386 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 8 +#elif defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) && \ + (defined(__ARM_FEATURE_SIMD32) || defined(__ARM_NEON)) +# define CLUTTER_VECTOR_REGISTER_AARCH64 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 32 +#elif defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) && \ + (defined(__ARM_FEATURE_SIMD32) || defined(__ARM_NEON)) +# define CLUTTER_VECTOR_REGISTER_NEON 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 16 +#endif + + +#ifdef CLUTTER_VECTOR_REGISTER_COUNT +static void +prepare_vector_data(unsigned char data[CLUTTER_VECTOR_REGISTER_COUNT][16]) +{ + static unsigned char basedata[16] = + { + 0xd7, 0xfe, 0x5c, 0x4b, 0x58, 0xfe, 0xf4, 0xb6, + 0xed, 0x2f, 0x31, 0xc9, 0x1d, 0xd3, 0x62, 0x8d + }; + int j, i; + + for (i = 0; i < CLUTTER_VECTOR_REGISTER_COUNT; i++) + { + for (j = 0; j < 16; j++) + { + data[i][j] = basedata[(i + j) % 16]; + } + + for (j = 0; j < 16; j++) + { + basedata[j] -= j; + } + } +} +#endif + + +static inline ALWAYS_INLINE void +clutter_vector_registers(void) +{ +#ifdef CLUTTER_VECTOR_REGISTER_COUNT + unsigned char data[CLUTTER_VECTOR_REGISTER_COUNT][16]; +#if defined(CLUTTER_VECTOR_REGISTER_AARCH64) || \ + defined(CLUTTER_VECTOR_REGISTER_NEON) + static int init; + static int have_neon; + + if (!init) + { + char *string; + + string = gcry_get_config (0, "hwflist"); + if (string) + { + have_neon = (strstr(string, "arm-neon:") != NULL); + xfree(string); + } + init = 1; + } + + if (!have_neon) + return; +#elif defined(CLUTTER_VECTOR_REGISTER_I386) + static int init; + static int have_ssse3; + + if (!init) + { + char *string; + + string = gcry_get_config (0, "hwflist"); + if (string) + { + have_ssse3 = (strstr(string, "intel-ssse3:") != NULL); + xfree(string); + } + init = 1; + } + + if (!have_ssse3) + return; +#endif + + prepare_vector_data(data); + +#if defined(CLUTTER_VECTOR_REGISTER_AMD64) + asm volatile("movdqu %[data0], %%xmm0\n" + "movdqu %[data1], %%xmm1\n" + "movdqu %[data2], %%xmm2\n" + "movdqu %[data3], %%xmm3\n" + "movdqu %[data4], %%xmm4\n" + "movdqu %[data5], %%xmm5\n" + "movdqu %[data6], %%xmm6\n" + "movdqu %[data7], %%xmm7\n" + "movdqu %[data8], %%xmm8\n" + "movdqu %[data9], %%xmm9\n" + "movdqu %[data10], %%xmm10\n" + "movdqu %[data11], %%xmm11\n" + "movdqu %[data12], %%xmm12\n" + "movdqu %[data13], %%xmm13\n" + "movdqu %[data14], %%xmm14\n" + "movdqu %[data15], %%xmm15\n" + : + : [data0] "m" (*data[0]), + [data1] "m" (*data[1]), + [data2] "m" (*data[2]), + [data3] "m" (*data[3]), + [data4] "m" (*data[4]), + [data5] "m" (*data[5]), + [data6] "m" (*data[6]), + [data7] "m" (*data[7]), + [data8] "m" (*data[8]), + [data9] "m" (*data[9]), + [data10] "m" (*data[10]), + [data11] "m" (*data[11]), + [data12] "m" (*data[12]), + [data13] "m" (*data[13]), + [data14] "m" (*data[14]), + [data15] "m" (*data[15]) + : "memory" +#ifdef __SSE2__ + ,"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + "xmm15" +#endif + ); +#elif defined(CLUTTER_VECTOR_REGISTER_I386) + asm volatile("movdqu %[data0], %%xmm0\n" + "movdqu %[data1], %%xmm1\n" + "movdqu %[data2], %%xmm2\n" + "movdqu %[data3], %%xmm3\n" + "movdqu %[data4], %%xmm4\n" + "movdqu %[data5], %%xmm5\n" + "movdqu %[data6], %%xmm6\n" + "movdqu %[data7], %%xmm7\n" + : + : [data0] "m" (*data[0]), + [data1] "m" (*data[1]), + [data2] "m" (*data[2]), + [data3] "m" (*data[3]), + [data4] "m" (*data[4]), + [data5] "m" (*data[5]), + [data6] "m" (*data[6]), + [data7] "m" (*data[7]) + : "memory" +#ifdef __SSE2__ + ,"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" +#endif + ); +#elif defined(CLUTTER_VECTOR_REGISTER_AARCH64) + asm volatile("mov x0, %[ptr]\n" + "ld1 {v0.16b}, [x0], #16\n" + "ld1 {v1.16b}, [x0], #16\n" + "ld1 {v2.16b}, [x0], #16\n" + "ld1 {v3.16b}, [x0], #16\n" + "ld1 {v4.16b}, [x0], #16\n" + "ld1 {v5.16b}, [x0], #16\n" + "ld1 {v6.16b}, [x0], #16\n" + "ld1 {v7.16b}, [x0], #16\n" + "ld1 {v8.16b}, [x0], #16\n" + "ld1 {v9.16b}, [x0], #16\n" + "ld1 {v10.16b}, [x0], #16\n" + "ld1 {v11.16b}, [x0], #16\n" + "ld1 {v12.16b}, [x0], #16\n" + "ld1 {v13.16b}, [x0], #16\n" + "ld1 {v14.16b}, [x0], #16\n" + "ld1 {v15.16b}, [x0], #16\n" + "ld1 {v16.16b}, [x0], #16\n" + "ld1 {v17.16b}, [x0], #16\n" + "ld1 {v18.16b}, [x0], #16\n" + "ld1 {v19.16b}, [x0], #16\n" + "ld1 {v20.16b}, [x0], #16\n" + "ld1 {v21.16b}, [x0], #16\n" + "ld1 {v22.16b}, [x0], #16\n" + "ld1 {v23.16b}, [x0], #16\n" + "ld1 {v24.16b}, [x0], #16\n" + "ld1 {v25.16b}, [x0], #16\n" + "ld1 {v26.16b}, [x0], #16\n" + "ld1 {v27.16b}, [x0], #16\n" + "ld1 {v28.16b}, [x0], #16\n" + "ld1 {v29.16b}, [x0], #16\n" + "ld1 {v30.16b}, [x0], #16\n" + "ld1 {v31.16b}, [x0], #16\n" + : + : [ptr] "r" (data) + : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "memory"); +#elif defined(CLUTTER_VECTOR_REGISTER_NEON) + asm volatile("mov r0, %[ptr]\n" + "vld1.64 {q0}, [r0]!\n" + "vld1.64 {q1}, [r0]!\n" + "vld1.64 {q2}, [r0]!\n" + "vld1.64 {q3}, [r0]!\n" + "vld1.64 {q4}, [r0]!\n" + "vld1.64 {q5}, [r0]!\n" + "vld1.64 {q6}, [r0]!\n" + "vld1.64 {q7}, [r0]!\n" + "vld1.64 {q8}, [r0]!\n" + "vld1.64 {q9}, [r0]!\n" + "vld1.64 {q10}, [r0]!\n" + "vld1.64 {q11}, [r0]!\n" + "vld1.64 {q12}, [r0]!\n" + "vld1.64 {q13}, [r0]!\n" + "vld1.64 {q14}, [r0]!\n" + "vld1.64 {q15}, [r0]!\n" + : + : [ptr] "r" (data) + : "r0", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", + "memory"); +#endif + +#endif /* CLUTTER_VECTOR_REGISTER_COUNT */ +} + + + static void check_cbc_mac_cipher (void) { @@ -8280,7 +8517,9 @@ check_bulk_cipher_modes (void) goto leave; } + clutter_vector_registers(); err = gcry_cipher_setkey (hde, tv[i].key, tv[i].keylen); + clutter_vector_registers(); if (!err) err = gcry_cipher_setkey (hdd, tv[i].key, tv[i].keylen); if (err) @@ -8296,7 +8535,9 @@ check_bulk_cipher_modes (void) goto leave; } + clutter_vector_registers(); err = gcry_cipher_setiv (hde, tv[i].iv, tv[i].ivlen); + clutter_vector_registers(); if (!err) err = gcry_cipher_setiv (hdd, tv[i].iv, tv[i].ivlen); if (err) @@ -8309,6 +8550,7 @@ check_bulk_cipher_modes (void) for (j=0; j < buflen; j++) buffer[j] = ((j & 0xff) ^ ((j >> 8) & 0xff)); + clutter_vector_registers(); err = gcry_cipher_encrypt (hde, outbuf, buflen, buffer, buflen); if (err) { @@ -8330,6 +8572,7 @@ check_bulk_cipher_modes (void) fail ("encrypt mismatch (algo %d, mode %d)\n", tv[i].algo, tv[i].mode); + clutter_vector_registers(); err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0); if (err) { @@ -8409,6 +8652,7 @@ check_one_cipher_core_reset (gcry_cipher_hd_t hd, int algo, int mode, int pass, if (mode == GCRY_CIPHER_MODE_OCB || mode == GCRY_CIPHER_MODE_CCM) { + clutter_vector_registers(); err = gcry_cipher_setiv (hd, iv, sizeof(iv)); if (err) { @@ -8535,6 +8779,7 @@ check_one_cipher_core (int algo, int mode, int flags, goto err_out_free; } + clutter_vector_registers(); err = gcry_cipher_setkey (hd, key, keylen); if (err) { @@ -8547,6 +8792,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out, nplain, plain, nplain); if (err) { @@ -8558,6 +8804,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_gettag (hd, tag, taglen); if (err) { @@ -8575,6 +8822,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, in, nplain, out, nplain); if (err) { @@ -8586,6 +8834,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8605,6 +8854,7 @@ check_one_cipher_core (int algo, int mode, int flags, goto err_out_free; memcpy (out, plain, nplain); + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out, nplain, NULL, 0); if (err) { @@ -8639,6 +8889,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, out, nplain, NULL, 0); if (err) { @@ -8651,6 +8902,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8677,6 +8929,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out + pos, piecelen, plain + pos, piecelen); if (err) @@ -8694,6 +8947,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_gettag (hd, tag, taglen); if (err) { @@ -8723,6 +8977,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, in + pos, piecelen, out + pos, piecelen); if (err) { @@ -8739,6 +8994,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8767,6 +9023,7 @@ check_one_cipher_core (int algo, int mode, int flags, piecelen = nplain - pos; memcpy (out + pos, plain + pos, piecelen); + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out + pos, piecelen, NULL, 0); if (err) { @@ -8795,6 +9052,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, out + pos, piecelen, NULL, 0); if (err) { @@ -9104,6 +9362,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (key && klen) { + clutter_vector_registers(); err = gcry_md_setkey (hd, key, klen); if (err) { @@ -9131,6 +9390,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (key && klen) { + clutter_vector_registers(); err = gcry_md_setkey (hd2, key, klen); if (err) { @@ -9149,10 +9409,12 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, gcry_md_reset (hd); gcry_md_reset (hd2); + clutter_vector_registers(); gcry_md_write (hd, buf, i); for (j = 0; j < i; j++) gcry_md_write (hd2, &buf[j], 1); + clutter_vector_registers(); p1 = gcry_md_read (hd, algo); p2 = gcry_md_read (hd2, algo); if (memcmp (p1, p2, mdlen)) @@ -9196,6 +9458,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_md_write (hd, aaa, piecelen); left -= piecelen; @@ -9212,6 +9475,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_md_write (hd, aaa, piecelen); left -= piecelen; @@ -9223,8 +9487,12 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, } } else - gcry_md_write (hd, data, len); + { + clutter_vector_registers(); + gcry_md_write (hd, data, len); + } + clutter_vector_registers(); err = gcry_md_copy (&hd2, hd); if (err) { @@ -9235,6 +9503,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (!xof) { + clutter_vector_registers(); p = gcry_md_read (hd2, algo); if (memcmp (p, expect, mdlen)) @@ -9255,12 +9524,14 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, char buf[1000]; int outmax = sizeof(buf) > elen ? elen : sizeof(buf); + clutter_vector_registers(); err = gcry_md_copy (&hd, hd2); if (err) { fail ("algo %d, gcry_md_copy failed: %s\n", algo, gpg_strerror (err)); } + clutter_vector_registers(); err = gcry_md_extract(hd2, algo, buf, outmax); if (err) { @@ -9283,6 +9554,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, memset(buf, 0, sizeof(buf)); /* Extract one byte at time. */ + clutter_vector_registers(); for (i = 0; i < outmax && !err; i++) err = gcry_md_extract(hd, algo, &buf[i], 1); if (err) @@ -9334,6 +9606,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, /* Extract large chucks, total 1000000 additional bytes. */ for (i = 0; i < 1000; i++) { + clutter_vector_registers(); err = gcry_md_extract(hd, algo, buf, 1000); if (!err) gcry_md_write(crc1, buf, 1000); @@ -9356,6 +9629,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (piecelen > left) piecelen = left; + clutter_vector_registers(); err = gcry_md_extract (hd2, algo, buf, piecelen); if (!err) gcry_md_write(crc2, buf, piecelen); @@ -9373,7 +9647,9 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, piecelen = piecelen * 2 - ((piecelen != startlen) ? startlen : 0); } + clutter_vector_registers(); p1 = gcry_md_read (crc1, crcalgo); + clutter_vector_registers(); p2 = gcry_md_read (crc2, crcalgo); if (memcmp (p1, p2, crclen)) @@ -9449,6 +9725,7 @@ check_one_md_multi (int algo, const char *data, int len, const char *expect) iovcnt++; assert (iovcnt <= DIM (iov)); + clutter_vector_registers(); err = gcry_md_hash_buffers (algo, 0, digest, iov, iovcnt); if (err) { @@ -9498,6 +9775,7 @@ check_one_md_final(int algo, const char *expect, unsigned int expectlen) for (i = 0; i < sizeof(inbuf); i++) inbuf[i] = i; + clutter_vector_registers(); gcry_md_hash_buffer (algo, xorbuf, NULL, 0); for (i = 1; i < sizeof(inbuf); i++) { @@ -11336,6 +11614,7 @@ check_one_mac (int algo, const char *data, int datalen, return; } + clutter_vector_registers(); err = gcry_mac_setkey (hd, key, keylen); if (err) fail("algo %d, mac gcry_mac_setkey failed: %s\n", algo, gpg_strerror (err)); @@ -11344,6 +11623,7 @@ check_one_mac (int algo, const char *data, int datalen, if (ivlen && iv) { + clutter_vector_registers(); err = gcry_mac_setiv (hd, iv, ivlen); if (err) fail("algo %d, mac gcry_mac_ivkey failed: %s\n", algo, @@ -11356,6 +11636,7 @@ check_one_mac (int algo, const char *data, int datalen, { for (i = 0; i < datalen; i++) { + clutter_vector_registers(); err = gcry_mac_write (hd, &data[i], 1); if (err) fail("algo %d, mac gcry_mac_write [buf-offset: %d] failed: %s\n", @@ -11389,6 +11670,7 @@ check_one_mac (int algo, const char *data, int datalen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_mac_write (hd, aaa, piecelen); left -= piecelen; @@ -11405,6 +11687,7 @@ check_one_mac (int algo, const char *data, int datalen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_mac_write (hd, aaa, piecelen); left -= piecelen; @@ -11417,6 +11700,7 @@ check_one_mac (int algo, const char *data, int datalen, } else { + clutter_vector_registers(); err = gcry_mac_write (hd, data, datalen); } @@ -11426,11 +11710,13 @@ check_one_mac (int algo, const char *data, int datalen, goto out; } + clutter_vector_registers(); err = gcry_mac_verify (hd, expect, maclen); if (err) fail("algo %d, mac gcry_mac_verify failed: %s\n", algo, gpg_strerror (err)); macoutlen = maclen; + clutter_vector_registers(); err = gcry_mac_read (hd, p, &macoutlen); if (err) fail("algo %d, mac gcry_mac_read failed: %s\n", algo, gpg_strerror (err)); From 8ebbd8545a209e78ea4fc0cea9dc2e4a3acd9be2 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 22 Jan 2020 19:39:41 +0200 Subject: [PATCH 17/27] Register DCO for H.J. Lu -- Signed-off-by: Jussi Kivilinna --- AUTHORS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AUTHORS b/AUTHORS index 8271c0d8..b72992f8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -157,6 +157,9 @@ Dmitry Eremin-Solenikov Dmitry Kasatkin 2012-12-14:50CAE2DB.80302@intel.com: +H.J. Lu +2020-01-19:20200119135241.GA4970@gmail.com: + Jia Zhang 2017-10-17:59E56E30.9060503@alibaba-inc.com: From 4c88c2bd2a418435506325cd53246acaaa52750c Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 19 Jan 2020 06:40:22 -0800 Subject: [PATCH 18/27] x86: Add .note.gnu.property section for Intel CET * configure.ac: Include in for assembly codes. -- When Intel CET is enabled, include in for assembly codes to mark Intel CET support. Signed-off-by: H.J. Lu --- configure.ac | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/configure.ac b/configure.ac index 74ca9be0..f31b7558 100644 --- a/configure.ac +++ b/configure.ac @@ -97,6 +97,12 @@ AH_TOP([ AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 +/* Add .note.gnu.property section for Intel CET in assembler sources + when CET is enabled. */ +#if defined(__ASSEMBLER__) && defined(__CET__) +# include +#endif + /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK From 24b4d5c10a97aaf82ac7402cc3a5b429d580cd66 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 19 Jan 2020 06:40:23 -0800 Subject: [PATCH 19/27] mpi: Add .note.gnu.property section for Intel CET * mpi/config.links: Include in . -- When Intel CET is enabled, include in for assembly codes to mark Intel CET support. Signed-off-by: H.J. Lu --- mpi/config.links | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mpi/config.links b/mpi/config.links index 3ead4f08..4f43b732 100644 --- a/mpi/config.links +++ b/mpi/config.links @@ -382,6 +382,16 @@ if test x"$mpi_cpu_arch" = x ; then mpi_cpu_arch="unknown" fi +# Add .note.gnu.property section for Intel CET in assembler sources +# when CET is enabled. */ +if test x"$mpi_cpu_arch" = xx86 ; then + cat <> ./mpi/asm-syntax.h + +#if defined(__ASSEMBLER__) && defined(__CET__) +# include +#endif +EOF +fi # Make sysdep.h echo '/* created by config.links - do not edit */' >./mpi/sysdep.h From 22e577071790834f07753c42a191a568c9f2644d Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 19 Jan 2020 06:40:24 -0800 Subject: [PATCH 20/27] amd64: Always include in cipher assembly codes * cipher/camellia-aesni-avx-amd64.S: Always include . * cipher/camellia-aesni-avx2-amd64.S: Likewise. * cipher/serpent-avx2-amd64.S: Likewise. -- When Intel CET is enabled, we need to include in assembly codes to mark Intel CET support even if it is empty. We should always include in cipher amd64 assembly codes so that they will be marked for Intel CET support when compiling for i686. Signed-off-by: H.J. Lu --- cipher/camellia-aesni-avx-amd64.S | 3 ++- cipher/camellia-aesni-avx2-amd64.S | 3 ++- cipher/serpent-avx2-amd64.S | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S index e16d4f61..4671bcfe 100644 --- a/cipher/camellia-aesni-avx-amd64.S +++ b/cipher/camellia-aesni-avx-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S index cc01c774..517e6880 100644 --- a/cipher/camellia-aesni-avx2-amd64.S +++ b/cipher/camellia-aesni-avx2-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S index 9b17c2bd..dcee9b62 100644 --- a/cipher/serpent-avx2-amd64.S +++ b/cipher/serpent-avx2-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \ defined(ENABLE_AVX2_SUPPORT) From cb9f0a2df8225eed071ae0a56265e38e9f6ff184 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 19 Jan 2020 06:40:25 -0800 Subject: [PATCH 21/27] i386: Add _CET_ENDBR to indirect jump targets * mpi/i386/mpih-add1.S (_gcry_mpih_add_n): Save and restore %ebx if IBT is enabed. Add _CET_ENDBR to indirect jump targets and adjust jump destination for _CET_ENDBR. * mpi/i386/mpih-sub1.S (_gcry_mpih_sub_n): Likewise. -- i386 mpih-add1.S and mpih-sub1.S use a trick to implment jump tables with LEA. We can't use conditional branches nor normal jump tables since jump table entries use EFLAGS set by jump table index. This patch adds _CET_ENDBR to indirect jump targets and adjust destination for _CET_ENDBR. Signed-off-by: H.J. Lu --- mpi/i386/mpih-add1.S | 37 +++++++++++++++++++++++++++++++++++++ mpi/i386/mpih-sub1.S | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/mpi/i386/mpih-add1.S b/mpi/i386/mpih-add1.S index 32091f34..2f1ae931 100644 --- a/mpi/i386/mpih-add1.S +++ b/mpi/i386/mpih-add1.S @@ -55,6 +55,11 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:) movl 20(%esp),%edx /* s2_ptr */ movl 24(%esp),%ecx /* size */ +#if defined __CET__ && (__CET__ & 1) != 0 + pushl %ebx + CFI_PUSH(%ebx) +#endif + movl %ecx,%eax shrl $3,%ecx /* compute count for unrolled loop */ negl %eax @@ -66,6 +71,9 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:) subl %eax,%esi /* ... by a constant when we ... */ subl %eax,%edx /* ... enter the loop */ shrl $2,%eax /* restore previous value */ +#if defined __CET__ && (__CET__ & 1) != 0 + leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ +#endif #ifdef PIC /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ @@ -77,30 +85,54 @@ L0: leal (%eax,%eax,8),%eax #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax +#endif +#if defined __CET__ && (__CET__ & 1) != 0 + addl %ebx,%eax /* Adjust for endbr32 */ #endif jmp *%eax /* jump into loop */ ALIGN (3) Loop: movl (%esi),%eax adcl (%edx),%eax movl %eax,(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 4(%esi),%eax adcl 4(%edx),%eax movl %eax,4(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 8(%esi),%eax adcl 8(%edx),%eax movl %eax,8(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 12(%esi),%eax adcl 12(%edx),%eax movl %eax,12(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 16(%esi),%eax adcl 16(%edx),%eax movl %eax,16(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 20(%esi),%eax adcl 20(%edx),%eax movl %eax,20(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 24(%esi),%eax adcl 24(%edx),%eax movl %eax,24(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 28(%esi),%eax adcl 28(%edx),%eax movl %eax,28(%edi) @@ -113,6 +145,11 @@ Loop: movl (%esi),%eax sbbl %eax,%eax negl %eax +#if defined __CET__ && (__CET__ & 1) != 0 + popl %ebx + CFI_POP(%ebx) +#endif + popl %esi CFI_POP(%esi) popl %edi diff --git a/mpi/i386/mpih-sub1.S b/mpi/i386/mpih-sub1.S index 501c4a9f..01e977e5 100644 --- a/mpi/i386/mpih-sub1.S +++ b/mpi/i386/mpih-sub1.S @@ -56,6 +56,11 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:) movl 20(%esp),%edx /* s2_ptr */ movl 24(%esp),%ecx /* size */ +#if defined __CET__ && (__CET__ & 1) != 0 + pushl %ebx + CFI_PUSH(%ebx) +#endif + movl %ecx,%eax shrl $3,%ecx /* compute count for unrolled loop */ negl %eax @@ -67,6 +72,9 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:) subl %eax,%esi /* ... by a constant when we ... */ subl %eax,%edx /* ... enter the loop */ shrl $2,%eax /* restore previous value */ +#if defined __CET__ && (__CET__ & 1) != 0 + leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ +#endif #ifdef PIC /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ @@ -78,30 +86,54 @@ L0: leal (%eax,%eax,8),%eax #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax +#endif +#if defined __CET__ && (__CET__ & 1) != 0 + addl %ebx,%eax /* Adjust for endbr32 */ #endif jmp *%eax /* jump into loop */ ALIGN (3) Loop: movl (%esi),%eax sbbl (%edx),%eax movl %eax,(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 4(%esi),%eax sbbl 4(%edx),%eax movl %eax,4(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 8(%esi),%eax sbbl 8(%edx),%eax movl %eax,8(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 12(%esi),%eax sbbl 12(%edx),%eax movl %eax,12(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 16(%esi),%eax sbbl 16(%edx),%eax movl %eax,16(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 20(%esi),%eax sbbl 20(%edx),%eax movl %eax,20(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 24(%esi),%eax sbbl 24(%edx),%eax movl %eax,24(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 28(%esi),%eax sbbl 28(%edx),%eax movl %eax,28(%edi) @@ -114,6 +146,11 @@ Loop: movl (%esi),%eax sbbl %eax,%eax negl %eax +#if defined __CET__ && (__CET__ & 1) != 0 + popl %ebx + CFI_POP(%ebx) +#endif + popl %esi CFI_POP(%esi) popl %edi From 5f098f7e6ceb899ac27a0a30ee036de5f1be4e3d Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 22 Jan 2020 21:31:49 +0200 Subject: [PATCH 22/27] mpi/i386: fix DWARF CFI for _gcry_mpih_sub_n and _gcry_mpih_add_n * mpi/i386/mpih-add1.S (_gcry_mpih_add_n) [PIC]: Adjust CFI CFA offset when making call and restoring stack pointer. * mpi/i386/mpih-sub1.S (_gcry_mpih_sub_n) [PIC]: Ditto. -- Signed-off-by: Jussi Kivilinna --- mpi/i386/mpih-add1.S | 2 ++ mpi/i386/mpih-sub1.S | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mpi/i386/mpih-add1.S b/mpi/i386/mpih-add1.S index 2f1ae931..de78a0cb 100644 --- a/mpi/i386/mpih-add1.S +++ b/mpi/i386/mpih-add1.S @@ -78,10 +78,12 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:) /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ call L0 + CFI_ADJUST_CFA_OFFSET(4) L0: leal (%eax,%eax,8),%eax addl (%esp),%eax addl $(Loop-L0-3),%eax addl $4,%esp + CFI_ADJUST_CFA_OFFSET(-4) #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax diff --git a/mpi/i386/mpih-sub1.S b/mpi/i386/mpih-sub1.S index 01e977e5..2bdc1438 100644 --- a/mpi/i386/mpih-sub1.S +++ b/mpi/i386/mpih-sub1.S @@ -79,10 +79,12 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:) /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ call L0 + CFI_ADJUST_CFA_OFFSET(4) L0: leal (%eax,%eax,8),%eax addl (%esp),%eax addl $(Loop-L0-3),%eax addl $4,%esp + CFI_ADJUST_CFA_OFFSET(-4) #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax From 8b31091da092e22dba78b2402c2f436bbffc1c73 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 22 Jan 2020 22:31:12 +0200 Subject: [PATCH 23/27] sexp: fix cast from 'int' pointer to 'size_t' pointer * src/sexp.c (do_vsexp_sscan): Change 'datalen' from 'int' to 'size_t'; Remove &datalen pointer cast to 'size_t *' type. -- Signed-off-by: Jussi Kivilinna --- src/sexp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sexp.c b/src/sexp.c index 57d77d29..17341ba5 100644 --- a/src/sexp.c +++ b/src/sexp.c @@ -1114,7 +1114,7 @@ do_vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff, int hexcount = 0; int b64count = 0; int quoted_esc = 0; - int datalen = 0; + size_t datalen = 0; size_t dummy_erroff; struct make_space_ctx c; int arg_counter = 0; @@ -1354,7 +1354,7 @@ do_vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff, goto leave; } err = gpgrt_b64dec_proc (b64state, b64buf, b64count, - (size_t *)&datalen); + &datalen); if (err && gpg_err_code (err) != GPG_ERR_EOF) { xfree (b64state); From e0898d0628789414da23e0526c87df1885c8b3ae Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Thu, 23 Jan 2020 10:29:10 +0900 Subject: [PATCH 24/27] random: Fix include of config.h. * random/random-drbg.c: Include config.h earlier. -- GnuPG-bug-id: 4818 Reported-by: Bruno Haible Signed-off-by: NIIBE Yutaka --- random/random-drbg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/random/random-drbg.c b/random/random-drbg.c index e0b4230e..6124f5fb 100644 --- a/random/random-drbg.c +++ b/random/random-drbg.c @@ -146,12 +146,12 @@ * gcry_randomize(outbuf, OUTLEN, GCRY_STRONG_RANDOM); */ +#include + #include #include #include -#include - #include "g10lib.h" #include "random.h" #include "rand-internal.h" From e5b68ea8a94a11126970723cf2d385039d239c63 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 30 Jan 2020 19:51:44 +0300 Subject: [PATCH 25/27] gost: add keymeshing support per RFC 4357 * cipher/gost-s-box.c: output whether we should enable or disable keymeshing for a particular parameters set. Signed-off-by: Dmitry Baryshkov --- cipher/gost-s-box.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/cipher/gost-s-box.c b/cipher/gost-s-box.c index 7aa54447..1260227b 100644 --- a/cipher/gost-s-box.c +++ b/cipher/gost-s-box.c @@ -26,9 +26,11 @@ struct gost_sbox { const char *name; const char *oid; + unsigned int keymeshing; unsigned char sbox[16*8]; } gost_sboxes[] = { - { "test_3411", "1.2.643.2.2.30.0", { + { "test_3411", "1.2.643.2.2.30.0", 0, + { 0x4, 0xE, 0x5, 0x7, 0x6, 0x4, 0xD, 0x1, 0xA, 0xB, 0x8, 0xD, 0xC, 0xB, 0xB, 0xF, 0x9, 0x4, 0x1, 0xA, 0x7, 0xA, 0x4, 0xD, @@ -50,7 +52,8 @@ struct gost_sbox 0x3, 0x9, 0xB, 0x3, 0x2, 0xE, 0xC, 0xC, } }, - { "CryptoPro_3411", "1.2.643.2.2.30.1", { + { "CryptoPro_3411", "1.2.643.2.2.30.1", 0, + { 0xA, 0x5, 0x7, 0x4, 0x7, 0x7, 0xD, 0x1, 0x4, 0xF, 0xF, 0xA, 0x6, 0x6, 0xE, 0x3, 0x5, 0x4, 0xC, 0x7, 0x4, 0x2, 0x4, 0xA, @@ -72,7 +75,8 @@ struct gost_sbox 0xF, 0x8, 0xD, 0x3, 0x5, 0x3, 0xB, 0xC, } }, - { "Test_89", "1.2.643.2.2.31.0", { + { "Test_89", "1.2.643.2.2.31.0", 0, + { 0x4, 0xC, 0xD, 0xE, 0x3, 0x8, 0x9, 0xC, 0x2, 0x9, 0x8, 0x9, 0xE, 0xF, 0xB, 0x6, 0xF, 0xF, 0xE, 0xB, 0x5, 0x6, 0xC, 0x5, @@ -94,7 +98,8 @@ struct gost_sbox 0x6, 0x5, 0xB, 0x8, 0x4, 0x4, 0xD, 0x8, } }, - { "CryptoPro_A", "1.2.643.2.2.31.1", { + { "CryptoPro_A", "1.2.643.2.2.31.1", 1, + { 0x9, 0x3, 0xE, 0xE, 0xB, 0x3, 0x1, 0xB, 0x6, 0x7, 0x4, 0x7, 0x5, 0xA, 0xD, 0xA, 0x3, 0xE, 0x6, 0xA, 0x1, 0xD, 0x2, 0xF, @@ -116,7 +121,8 @@ struct gost_sbox 0x5, 0x1, 0x9, 0x6, 0x6, 0x6, 0xE, 0x4, } }, - { "CryptoPro_B", "1.2.643.2.2.31.2", { + { "CryptoPro_B", "1.2.643.2.2.31.2", 1, + { 0x8, 0x0, 0xE, 0x7, 0x2, 0x8, 0x5, 0x0, 0x4, 0x1, 0xC, 0x5, 0x7, 0x3, 0x2, 0x4, 0xB, 0x2, 0x0, 0x0, 0xC, 0x2, 0xA, 0xB, @@ -138,7 +144,8 @@ struct gost_sbox 0xF, 0xE, 0x4, 0x8, 0x3, 0x5, 0xE, 0xC, } }, - { "CryptoPro_C", "1.2.643.2.2.31.3", { + { "CryptoPro_C", "1.2.643.2.2.31.3", 1, + { 0x1, 0x0, 0x8, 0x3, 0x8, 0xC, 0xA, 0x7, 0xB, 0x1, 0x2, 0x6, 0xD, 0x9, 0x9, 0x4, 0xC, 0x7, 0x5, 0x0, 0xB, 0xB, 0x6, 0x0, @@ -160,7 +167,8 @@ struct gost_sbox 0x3, 0x3, 0xB, 0x4, 0x7, 0xD, 0x7, 0x8, } }, - { "CryptoPro_D", "1.2.643.2.2.31.4", { + { "CryptoPro_D", "1.2.643.2.2.31.4", 1, + { 0xF, 0xB, 0x1, 0x1, 0x0, 0x8, 0x3, 0x1, 0xC, 0x6, 0xC, 0x5, 0xC, 0x0, 0x0, 0xA, 0x2, 0x3, 0xB, 0xE, 0x8, 0xF, 0x6, 0x6, @@ -182,7 +190,8 @@ struct gost_sbox 0x3, 0x1, 0x2, 0x8, 0x1, 0x6, 0x7, 0xE, } }, - { "TC26_Z", "1.2.643.7.1.2.5.1.1", { + { "TC26_Z", "1.2.643.7.1.2.5.1.1", 0, + { 0xc, 0x6, 0xb, 0xc, 0x7, 0x5, 0x8, 0x1, 0x4, 0x8, 0x3, 0x8, 0xf, 0xd, 0xe, 0x7, 0x6, 0x2, 0x5, 0x2, 0x5, 0xf, 0x2, 0xe, @@ -242,14 +251,14 @@ int main(int argc, char **argv) fprintf (f, "\n };\n\n"); } - fprintf (f, "static struct\n{\n const char *oid;\n const u32 *sbox;\n} gost_oid_map[] = {\n"); + fprintf (f, "static struct\n{\n const char *oid;\n const u32 *sbox;\n const int keymeshing;\n} gost_oid_map[] = {\n"); for (s = 0; s < DIM(gost_sboxes); s++) { - fprintf (f, " { \"%s\", sbox_%s },\n", gost_sboxes[s].oid, gost_sboxes[s].name ); + fprintf (f, " { \"%s\", sbox_%s, %d },\n", gost_sboxes[s].oid, gost_sboxes[s].name, gost_sboxes[s].keymeshing ); } - fprintf(f, " { NULL, NULL }\n};\n"); + fprintf(f, " { NULL, NULL, 0 }\n};\n"); fclose (f); From 37ce63965239f623c4f8b94e8687a0f3fe7a3402 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 31 Jan 2020 00:55:20 +0300 Subject: [PATCH 26/27] gost28147: add support for CryptoPro key meshing per RFC 4357 Signed-off-by: Dmitry Baryshkov --- cipher/cipher.c | 8 +++- cipher/gost.h | 2 + cipher/gost28147.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-- doc/gcrypt.texi | 7 ++++ src/cipher.h | 1 + src/gcrypt.h.in | 3 +- tests/basic.c | 15 ++++++-- 7 files changed, 118 insertions(+), 10 deletions(-) diff --git a/cipher/cipher.c b/cipher/cipher.c index bd571367..fd9e1927 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -83,6 +83,7 @@ static gcry_cipher_spec_t * const cipher_list[] = #endif #if USE_GOST28147 &_gcry_cipher_spec_gost28147, + &_gcry_cipher_spec_gost28147_mesh, #endif #if USE_CHACHA20 &_gcry_cipher_spec_chacha20, @@ -193,7 +194,12 @@ static gcry_cipher_spec_t * const cipher_list_algo301[] = NULL, #endif #if USE_CHACHA20 - &_gcry_cipher_spec_chacha20 + &_gcry_cipher_spec_chacha20, +#else + NULL, +#endif +#if USE_GOST28147 + &_gcry_cipher_spec_gost28147_mesh, #else NULL, #endif diff --git a/cipher/gost.h b/cipher/gost.h index 04c2f85e..53a40505 100644 --- a/cipher/gost.h +++ b/cipher/gost.h @@ -23,6 +23,8 @@ typedef struct { u32 key[8]; const u32 *sbox; + unsigned int mesh_counter; + unsigned int mesh_limit; } GOST28147_context; /* This is a simple interface that will be used by GOST R 34.11-94 */ diff --git a/cipher/gost28147.c b/cipher/gost28147.c index f30ca16a..00d72902 100644 --- a/cipher/gost28147.c +++ b/cipher/gost28147.c @@ -38,6 +38,13 @@ #include "gost.h" #include "gost-sb.h" +static void +gost_do_set_sbox (GOST28147_context *ctx, unsigned int index) +{ + ctx->sbox = gost_oid_map[index].sbox; + ctx->mesh_limit = gost_oid_map[index].keymeshing ? 1024 : 0; +} + static gcry_err_code_t gost_setkey (void *c, const byte *key, unsigned keylen, gcry_cipher_hd_t hd) @@ -51,12 +58,15 @@ gost_setkey (void *c, const byte *key, unsigned keylen, return GPG_ERR_INV_KEYLEN; if (!ctx->sbox) - ctx->sbox = sbox_test_3411; + gost_do_set_sbox (ctx, 0); for (i = 0; i < 8; i++) { ctx->key[i] = buf_get_le32(&key[4*i]); } + + ctx->mesh_counter = 0; + return GPG_ERR_NO_ERROR; } @@ -178,7 +188,7 @@ gost_set_sbox (GOST28147_context *ctx, const char *oid) { if (!strcmp(gost_oid_map[i].oid, oid)) { - ctx->sbox = gost_oid_map[i].sbox; + gost_do_set_sbox (ctx, i); return 0; } } @@ -207,8 +217,67 @@ gost_set_extra_info (void *c, int what, const void *buffer, size_t buflen) return ec; } -static gcry_cipher_oid_spec_t oids_gost28147[] = +static const byte CryptoProKeyMeshingKey[] = { + 0x69, 0x00, 0x72, 0x22, 0x64, 0xC9, 0x04, 0x23, + 0x8D, 0x3A, 0xDB, 0x96, 0x46, 0xE9, 0x2A, 0xC4, + 0x18, 0xFE, 0xAC, 0x94, 0x00, 0xED, 0x07, 0x12, + 0xC0, 0x86, 0xDC, 0xC2, 0xEF, 0x4C, 0xA9, 0x2B +}; + +/* Implements key meshing algorithm by modifing ctx and returning new IV. + Thanks to Dmitry Belyavskiy. */ +static void +cryptopro_key_meshing (GOST28147_context *ctx) +{ + unsigned char newkey[32]; + unsigned int i; + + /* "Decrypt" the static keymeshing key */ + for (i = 0; i < 4; i++) + { + gost_decrypt_block (ctx, newkey + i*8, CryptoProKeyMeshingKey + i*8); + } + + /* Set new key */ + for (i = 0; i < 8; i++) + { + ctx->key[i] = buf_get_le32(&newkey[4*i]); + } + + ctx->mesh_counter = 0; +} + +static unsigned int +gost_encrypt_block_mesh (void *c, byte *outbuf, const byte *inbuf) +{ + GOST28147_context *ctx = c; + u32 n1, n2; + unsigned int burn; + + n1 = buf_get_le32 (inbuf); + n2 = buf_get_le32 (inbuf+4); + + if (ctx->mesh_limit && (ctx->mesh_counter == ctx->mesh_limit)) + { + cryptopro_key_meshing (ctx); + /* Yes, encrypt twice: once for KeyMeshing procedure per RFC 4357, + * once for block encryption */ + _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2); + } + + burn = _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2); + + ctx->mesh_counter += 8; + + buf_put_le32 (outbuf+0, n1); + buf_put_le32 (outbuf+4, n2); + + return /* burn_stack */ burn + 6*sizeof(void*) /* func call */; +} + +static gcry_cipher_oid_spec_t oids_gost28147_mesh[] = { + { "1.2.643.2.2.21", GCRY_CIPHER_MODE_CFB }, /* { "1.2.643.2.2.31.0", GCRY_CIPHER_MODE_CNTGOST }, */ { "1.2.643.2.2.31.1", GCRY_CIPHER_MODE_CFB }, { "1.2.643.2.2.31.2", GCRY_CIPHER_MODE_CFB }, @@ -220,10 +289,25 @@ static gcry_cipher_oid_spec_t oids_gost28147[] = gcry_cipher_spec_t _gcry_cipher_spec_gost28147 = { GCRY_CIPHER_GOST28147, {0, 0}, - "GOST28147", NULL, oids_gost28147, 8, 256, + "GOST28147", NULL, NULL, 8, 256, sizeof (GOST28147_context), gost_setkey, gost_encrypt_block, gost_decrypt_block, NULL, NULL, NULL, gost_set_extra_info, }; + +/* Meshing is used only for CFB, so no need to have separate + * gost_decrypt_block_mesh. + * Moreover key meshing is specified as encrypting the block (IV). Decrypting + * it afterwards would be meaningless. */ +gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh = + { + GCRY_CIPHER_GOST28147_MESH, {0, 0}, + "GOST28147_MESH", NULL, oids_gost28147_mesh, 8, 256, + sizeof (GOST28147_context), + gost_setkey, + gost_encrypt_block_mesh, + gost_decrypt_block, + NULL, NULL, NULL, gost_set_extra_info, + }; diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index 091704de..8cd6e480 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -1630,6 +1630,13 @@ This is the Salsa20/12 - reduced round version of Salsa20 stream cipher. The GOST 28147-89 cipher, defined in the respective GOST standard. Translation of this GOST into English is provided in the RFC-5830. +@item GCRY_CIPHER_GOST28147_MESH +@cindex GOST 28147-89 CryptoPro keymeshing +The GOST 28147-89 cipher, defined in the respective GOST standard. +Translation of this GOST into English is provided in the RFC-5830. +This cipher will use CryptoPro keymeshing as defined in RFC 4357 +if it has to be used for the selected parameter set. + @item GCRY_CIPHER_CHACHA20 @cindex ChaCha20 This is the ChaCha20 stream cipher. diff --git a/src/cipher.h b/src/cipher.h index 1fe50890..20ccb8c5 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -300,6 +300,7 @@ extern gcry_cipher_spec_t _gcry_cipher_spec_idea; extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20; extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12; extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147; +extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh; extern gcry_cipher_spec_t _gcry_cipher_spec_chacha20; /* Declarations for the digest specifications. */ diff --git a/src/gcrypt.h.in b/src/gcrypt.h.in index c008f0a6..f38d476c 100644 --- a/src/gcrypt.h.in +++ b/src/gcrypt.h.in @@ -944,7 +944,8 @@ enum gcry_cipher_algos GCRY_CIPHER_SALSA20 = 313, GCRY_CIPHER_SALSA20R12 = 314, GCRY_CIPHER_GOST28147 = 315, - GCRY_CIPHER_CHACHA20 = 316 + GCRY_CIPHER_CHACHA20 = 316, + GCRY_CIPHER_GOST28147_MESH = 317 /* GOST 28247 with optional CryptoPro keymeshing */ }; /* The Rijndael algorithm is basically AES, so provide some macros. */ diff --git a/tests/basic.c b/tests/basic.c index 812bd89d..6ccce4a3 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -7059,7 +7059,7 @@ check_xts_cipher (void) static void -check_gost28147_cipher (void) +check_gost28147_cipher_basic (enum gcry_cipher_algos algo) { #if USE_GOST28147 static const struct { @@ -7136,7 +7136,7 @@ check_gost28147_cipher (void) if (verbose) fprintf (stderr, " Starting GOST28147 cipher checks.\n"); - keylen = gcry_cipher_get_algo_keylen(GCRY_CIPHER_GOST28147); + keylen = gcry_cipher_get_algo_keylen(algo); if (!keylen) { fail ("gost28147, gcry_cipher_get_algo_keylen failed\n"); @@ -7145,10 +7145,10 @@ check_gost28147_cipher (void) for (i = 0; i < sizeof (tv) / sizeof (tv[0]); i++) { - err = gcry_cipher_open (&hde, GCRY_CIPHER_GOST28147, + err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_ECB, 0); if (!err) - err = gcry_cipher_open (&hdd, GCRY_CIPHER_GOST28147, + err = gcry_cipher_open (&hdd, algo, GCRY_CIPHER_MODE_ECB, 0); if (err) { @@ -7225,6 +7225,12 @@ check_gost28147_cipher (void) #endif } +static void +check_gost28147_cipher (void) +{ + check_gost28147_cipher_basic (GCRY_CIPHER_GOST28147); + check_gost28147_cipher_basic (GCRY_CIPHER_GOST28147_MESH); +} static void check_stream_cipher (void) @@ -9214,6 +9220,7 @@ check_ciphers (void) #endif #if USE_GOST28147 GCRY_CIPHER_GOST28147, + GCRY_CIPHER_GOST28147_MESH, #endif 0 }; From 7d8d4da6100a7d6c6fd740323277297cbfc50f4d Mon Sep 17 00:00:00 2001 From: Paul Wolneykien Date: Thu, 6 Feb 2020 13:59:45 +0300 Subject: [PATCH 27/27] Fix: Switch key meshing on for the TC26_Z S-box Signed-off-by: Paul Wolneykien --- cipher/gost-s-box.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cipher/gost-s-box.c b/cipher/gost-s-box.c index 1260227b..5d5ed7dc 100644 --- a/cipher/gost-s-box.c +++ b/cipher/gost-s-box.c @@ -190,7 +190,7 @@ struct gost_sbox 0x3, 0x1, 0x2, 0x8, 0x1, 0x6, 0x7, 0xE, } }, - { "TC26_Z", "1.2.643.7.1.2.5.1.1", 0, + { "TC26_Z", "1.2.643.7.1.2.5.1.1", 1, { 0xc, 0x6, 0xb, 0xc, 0x7, 0x5, 0x8, 0x1, 0x4, 0x8, 0x3, 0x8, 0xf, 0xd, 0xe, 0x7,