From 5b9ea3df0dc355d77b9f061f63064614a97b8b67 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 22 Dec 2019 16:43:50 +0200 Subject: [PATCH 01/31] cipher: fix typo in error log * cipher/cipher.c (_gcry_cipher_encrypt): Fix log "cipher_decrypt: ..." to "cipher_encrypt: ...". -- Signed-off-by: Jussi Kivilinna --- cipher/cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cipher/cipher.c b/cipher/cipher.c index ab3e4240..bd571367 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -1125,7 +1125,7 @@ _gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize, if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key) { - log_error ("cipher_decrypt: key not set\n"); + log_error ("cipher_encrypt: key not set\n"); return GPG_ERR_MISSING_KEY; } From 0837d7e6be3e604c1f7b86d18c582d8aa7ed858c Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 23 Dec 2019 03:39:46 +0200 Subject: [PATCH 02/31] rijndael-ppc: fix bad register used for vector load/store assembly * cipher/rijndael-ppc.c (vec_aligned_ld, vec_load_be, vec_aligned_st) (vec_store_be): Add "r0" to clobber list for load/store instructions. -- Register r0 must not be used for RA input for vector load/store instructions as r0 is not read as register but as value '0'. Signed-off-by: Jussi Kivilinna --- cipher/rijndael-ppc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 7c349f8b..48a47edd 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -138,7 +138,7 @@ vec_aligned_ld(unsigned long offset, const unsigned char *ptr) __asm__ ("lvx %0,%1,%2\n\t" : "=v" (vec) : "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + : "memory", "r0"); return vec; #else return vec_vsx_ld (offset, ptr); @@ -169,7 +169,7 @@ vec_load_be(unsigned long offset, const unsigned char *ptr, __asm__ ("lxvw4x %x0,%1,%2\n\t" : "=wa" (vec) : "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + : "memory", "r0"); __asm__ ("vperm %0,%1,%1,%2\n\t" : "=v" (vec) : "v" (vec), "v" (be_bswap_const)); @@ -188,7 +188,7 @@ vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr) __asm__ ("stvx %0,%1,%2\n\t" : : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + : "memory", "r0"); #else vec_vsx_st (vec, offset, ptr); #endif @@ -208,7 +208,7 @@ vec_store_be(block vec, unsigned long offset, unsigned char *ptr, __asm__ ("stxvw4x %x0,%1,%2\n\t" : : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + : "memory", "r0"); #else (void)be_bswap_const; vec_vsx_st (vec, offset, ptr); From 110077505acacae62cec3d09b32a084b9cee0368 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 22 Dec 2019 16:44:07 +0200 Subject: [PATCH 03/31] rijndael-ppc: performance improvements * cipher/rijndael-ppc.c (ALIGNED_LOAD, ALIGNED_STORE, VEC_LOAD_BE) (VEC_STORE_BE): Rewrite. (VEC_BE_SWAP, VEC_LOAD_BE_NOSWAP, VEC_STORE_BE_NOSWAP): New. (PRELOAD_ROUND_KEYS, AES_ENCRYPT, AES_DECRYPT): Adjust to new input parameters for vector load macros. (ROUND_KEY_VARIABLES_ALL, PRELOAD_ROUND_KEYS_ALL) (AES_ENCRYPT_ALL): New. (vec_bswap32_const_neg): New. (vec_aligned_ld, vec_aligned_st, vec_load_be_const): Rename to... (asm_aligned_ls, asm_aligned_st, asm_load_be_const): ...these. (asm_be_swap, asm_vperm1, asm_load_be_noswap) (asm_store_be_noswap): New. (vec_add_uint128): Rename to... (asm_add_uint128): ...this. (asm_xor, asm_cipher_be, asm_cipherlast_be, asm_ncipher_be) (asm_ncipherlast_be): New inline assembly functions with volatile keyword to allow manual instruction ordering. (_gcry_aes_ppc8_setkey, aes_ppc8_prepare_decryption) (_gcry_aes_ppc8_encrypt, _gcry_aes_ppc8_decrypt) (_gcry_aes_ppc8_cfb_enc, _gcry_aes_ppc8_cbc_enc) (_gcry_aes_ppc8_ocb_auth): Update to use new&rewritten helper macros. (_gcry_aes_ppc8_cfb_dec, _gcry_aes_ppc8_cbc_dec) (_gcry_aes_ppc8_ctr_enc, _gcry_aes_ppc8_ocb_crypt) (_gcry_aes_ppc8_xts_crypt): Update to use new&rewritten helper macros; Tune 8-block parallel paths with manual instruction ordering. -- Benchmarks on POWER8 (ppc64le, ~3.8Ghz): Before: AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.06 ns/B 902.2 MiB/s 4.02 c/B CBC dec | 0.208 ns/B 4585 MiB/s 0.790 c/B CFB enc | 1.06 ns/B 900.4 MiB/s 4.02 c/B CFB dec | 0.208 ns/B 4588 MiB/s 0.790 c/B CTR enc | 0.238 ns/B 4007 MiB/s 0.904 c/B CTR dec | 0.238 ns/B 4009 MiB/s 0.904 c/B XTS enc | 0.492 ns/B 1937 MiB/s 1.87 c/B XTS dec | 0.488 ns/B 1955 MiB/s 1.85 c/B OCB enc | 0.243 ns/B 3928 MiB/s 0.922 c/B OCB dec | 0.247 ns/B 3858 MiB/s 0.939 c/B OCB auth | 0.213 ns/B 4482 MiB/s 0.809 c/B After (cbc-dec & cfb-dec & xts & ocb ~6% faster, ctr ~11% faster): AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.06 ns/B 902.1 MiB/s 4.02 c/B CBC dec | 0.196 ns/B 4877 MiB/s 0.743 c/B CFB enc | 1.06 ns/B 902.2 MiB/s 4.02 c/B CFB dec | 0.195 ns/B 4889 MiB/s 0.741 c/B CTR enc | 0.214 ns/B 4448 MiB/s 0.815 c/B CTR dec | 0.214 ns/B 4452 MiB/s 0.814 c/B XTS enc | 0.461 ns/B 2067 MiB/s 1.75 c/B XTS dec | 0.456 ns/B 2092 MiB/s 1.73 c/B OCB enc | 0.227 ns/B 4200 MiB/s 0.863 c/B OCB dec | 0.234 ns/B 4072 MiB/s 0.890 c/B OCB auth | 0.207 ns/B 4604 MiB/s 0.787 c/B Benchmarks on POWER9 (ppc64le, ~3.8Ghz): Before: AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.04 ns/B 918.7 MiB/s 3.94 c/B CBC dec | 0.240 ns/B 3982 MiB/s 0.910 c/B CFB enc | 1.04 ns/B 917.6 MiB/s 3.95 c/B CFB dec | 0.241 ns/B 3963 MiB/s 0.914 c/B CTR enc | 0.249 ns/B 3835 MiB/s 0.945 c/B CTR dec | 0.252 ns/B 3787 MiB/s 0.957 c/B XTS enc | 0.505 ns/B 1889 MiB/s 1.92 c/B XTS dec | 0.495 ns/B 1926 MiB/s 1.88 c/B OCB enc | 0.303 ns/B 3152 MiB/s 1.15 c/B OCB dec | 0.305 ns/B 3129 MiB/s 1.16 c/B OCB auth | 0.265 ns/B 3595 MiB/s 1.01 c/B After (cbc-dec & cfb-dec ~6% faster, ctr ~11% faster, ocb ~4% faster): AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.04 ns/B 917.3 MiB/s 3.95 c/B CBC dec | 0.225 ns/B 4234 MiB/s 0.856 c/B CFB enc | 1.04 ns/B 917.8 MiB/s 3.95 c/B CFB dec | 0.226 ns/B 4214 MiB/s 0.860 c/B CTR enc | 0.221 ns/B 4306 MiB/s 0.842 c/B CTR dec | 0.223 ns/B 4271 MiB/s 0.848 c/B XTS enc | 0.503 ns/B 1897 MiB/s 1.91 c/B XTS dec | 0.495 ns/B 1928 MiB/s 1.88 c/B OCB enc | 0.288 ns/B 3309 MiB/s 1.10 c/B OCB dec | 0.292 ns/B 3266 MiB/s 1.11 c/B OCB auth | 0.267 ns/B 3570 MiB/s 1.02 c/B Signed-off-by: Jussi Kivilinna --- cipher/rijndael-ppc.c | 1839 +++++++++++++++++++++++++---------------- 1 file changed, 1112 insertions(+), 727 deletions(-) diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 48a47edd..a8bcae46 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -51,17 +51,27 @@ typedef union #define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE -#define ALIGNED_LOAD(in_ptr) \ - (vec_aligned_ld (0, (const unsigned char *)(in_ptr))) +#define ALIGNED_LOAD(in_ptr, offs) \ + (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr))) -#define ALIGNED_STORE(out_ptr, vec) \ - (vec_aligned_st ((vec), 0, (unsigned char *)(out_ptr))) +#define ALIGNED_STORE(out_ptr, offs, vec) \ + (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr))) -#define VEC_LOAD_BE(in_ptr, bige_const) \ - (vec_load_be (0, (const unsigned char *)(in_ptr), bige_const)) +#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const))) -#define VEC_STORE_BE(out_ptr, vec, bige_const) \ - (vec_store_be ((vec), 0, (unsigned char *)(out_ptr), bige_const)) +#define VEC_LOAD_BE(in_ptr, offs, bige_const) \ + (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \ + bige_const)) + +#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \ + (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr))) + +#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \ + (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \ + (void *)(out_ptr))) + +#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \ + (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr))) #define ROUND_KEY_VARIABLES \ @@ -69,166 +79,257 @@ typedef union #define PRELOAD_ROUND_KEYS(nrounds) \ do { \ - rkey0 = ALIGNED_LOAD(&rk[0]); \ - rkeylast = ALIGNED_LOAD(&rk[nrounds]); \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ } while (0) - #define AES_ENCRYPT(blk, nrounds) \ do { \ blk ^= rkey0; \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[1])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[2])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[3])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[4])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[5])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[6])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[7])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[8])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[9])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \ if (nrounds >= 12) \ { \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[10])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[11])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \ if (rounds > 12) \ { \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[12])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[13])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \ } \ } \ - blk = vec_cipherlast_be (blk, rkeylast); \ + blk = asm_cipherlast_be (blk, rkeylast); \ } while (0) - #define AES_DECRYPT(blk, nrounds) \ do { \ blk ^= rkey0; \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[1])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[2])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[3])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[4])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[5])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[6])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[7])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[8])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[9])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \ if (nrounds >= 12) \ { \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[10])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[11])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \ if (rounds > 12) \ { \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[12])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[13])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \ } \ } \ - blk = vec_ncipherlast_be (blk, rkeylast); \ + blk = asm_ncipherlast_be (blk, rkeylast); \ } while (0) +#define ROUND_KEY_VARIABLES_ALL \ + block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \ + rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast + +#define PRELOAD_ROUND_KEYS_ALL(nrounds) \ + do { \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkey1 = ALIGNED_LOAD (rk, 1); \ + rkey2 = ALIGNED_LOAD (rk, 2); \ + rkey3 = ALIGNED_LOAD (rk, 3); \ + rkey4 = ALIGNED_LOAD (rk, 4); \ + rkey5 = ALIGNED_LOAD (rk, 5); \ + rkey6 = ALIGNED_LOAD (rk, 6); \ + rkey7 = ALIGNED_LOAD (rk, 7); \ + rkey8 = ALIGNED_LOAD (rk, 8); \ + rkey9 = ALIGNED_LOAD (rk, 9); \ + if (nrounds >= 12) \ + { \ + rkey10 = ALIGNED_LOAD (rk, 10); \ + rkey11 = ALIGNED_LOAD (rk, 11); \ + if (rounds > 12) \ + { \ + rkey12 = ALIGNED_LOAD (rk, 12); \ + rkey13 = ALIGNED_LOAD (rk, 13); \ + } \ + } \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ + } while (0) + +#define AES_ENCRYPT_ALL(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_cipher_be (blk, rkey1); \ + blk = asm_cipher_be (blk, rkey2); \ + blk = asm_cipher_be (blk, rkey3); \ + blk = asm_cipher_be (blk, rkey4); \ + blk = asm_cipher_be (blk, rkey5); \ + blk = asm_cipher_be (blk, rkey6); \ + blk = asm_cipher_be (blk, rkey7); \ + blk = asm_cipher_be (blk, rkey8); \ + blk = asm_cipher_be (blk, rkey9); \ + if (nrounds >= 12) \ + { \ + blk = asm_cipher_be (blk, rkey10); \ + blk = asm_cipher_be (blk, rkey11); \ + if (rounds > 12) \ + { \ + blk = asm_cipher_be (blk, rkey12); \ + blk = asm_cipher_be (blk, rkey13); \ + } \ + } \ + blk = asm_cipherlast_be (blk, rkeylast); \ + } while (0) + + +#ifdef WORDS_BIGENDIAN static const block vec_bswap32_const = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; +#else +static const block vec_bswap32_const_neg = + { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 }; +#endif static ASM_FUNC_ATTR_INLINE block -vec_aligned_ld(unsigned long offset, const unsigned char *ptr) +asm_aligned_ld(unsigned long offset, const void *ptr) { -#ifndef WORDS_BIGENDIAN block vec; - __asm__ ("lvx %0,%1,%2\n\t" - : "=v" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); + __asm__ volatile ("lvx %0,%1,%2\n\t" + : "=v" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); return vec; -#else - return vec_vsx_ld (offset, ptr); -#endif } +static ASM_FUNC_ATTR_INLINE void +asm_aligned_st(block vec, unsigned long offset, void *ptr) +{ + __asm__ volatile ("stvx %0,%1,%2\n\t" + : + : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} static ASM_FUNC_ATTR_INLINE block -vec_load_be_const(void) +asm_load_be_const(void) { #ifndef WORDS_BIGENDIAN - return ~ALIGNED_LOAD(&vec_bswap32_const); + return ALIGNED_LOAD (&vec_bswap32_const_neg, 0); #else static const block vec_dummy = { 0 }; return vec_dummy; #endif } - static ASM_FUNC_ATTR_INLINE block -vec_load_be(unsigned long offset, const unsigned char *ptr, - block be_bswap_const) +asm_vperm1(block vec, block mask) { -#ifndef WORDS_BIGENDIAN - block vec; - /* GCC vec_vsx_ld is generating two instructions on little-endian. Use - * lxvw4x directly instead. */ - __asm__ ("lxvw4x %x0,%1,%2\n\t" - : "=wa" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); - __asm__ ("vperm %0,%1,%1,%2\n\t" - : "=v" (vec) - : "v" (vec), "v" (be_bswap_const)); - return vec; -#else - (void)be_bswap_const; - return vec_vsx_ld (offset, ptr); -#endif + block o; + __asm__ volatile ("vperm %0,%1,%1,%2\n\t" + : "=v" (o) + : "v" (vec), "v" (mask)); + return o; } - -static ASM_FUNC_ATTR_INLINE void -vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr) +static ASM_FUNC_ATTR_INLINE block +asm_be_swap(block vec, block be_bswap_const) { + (void)be_bswap_const; #ifndef WORDS_BIGENDIAN - __asm__ ("stvx %0,%1,%2\n\t" - : - : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); + return asm_vperm1 (vec, be_bswap_const); #else - vec_vsx_st (vec, offset, ptr); + return vec; #endif } +static ASM_FUNC_ATTR_INLINE block +asm_load_be_noswap(unsigned long offset, const void *ptr) +{ + block vec; + __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" + : "=wa" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */ + return vec; +} static ASM_FUNC_ATTR_INLINE void -vec_store_be(block vec, unsigned long offset, unsigned char *ptr, - block be_bswap_const) +asm_store_be_noswap(block vec, unsigned long offset, void *ptr) { -#ifndef WORDS_BIGENDIAN - /* GCC vec_vsx_st is generating two instructions on little-endian. Use - * stxvw4x directly instead. */ - __asm__ ("vperm %0,%1,%1,%2\n\t" - : "=v" (vec) - : "v" (vec), "v" (be_bswap_const)); - __asm__ ("stxvw4x %x0,%1,%2\n\t" - : - : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); -#else - (void)be_bswap_const; - vec_vsx_st (vec, offset, ptr); -#endif + /* NOTE: vec be-swapped using 'asm_be_swap' by caller */ + __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" + : + : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); } +static ASM_FUNC_ATTR_INLINE block +asm_add_uint128(block a, block b) +{ + block res; + __asm__ volatile ("vadduqm %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} static ASM_FUNC_ATTR_INLINE block -vec_add_uint128(block a, block b) +asm_xor(block a, block b) { -#if 1 block res; - /* Use assembly as GCC (v8.3) generates slow code for vec_vadduqm. */ - __asm__ ("vadduqm %0,%1,%2\n\t" - : "=v" (res) - : "v" (a), "v" (b)); + __asm__ volatile ("vxor %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); return res; -#else - return (block)vec_vadduqm((vector __uint128_t)a, (vector __uint128_t)b); -#endif +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; } @@ -250,7 +351,7 @@ _gcry_aes_sbox4_ppc8(u32 fourbytes) void _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); union { PROPERLY_ALIGNED_TYPE dummy; @@ -345,11 +446,11 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) for (r = 0; r <= rounds; r++) { #ifndef WORDS_BIGENDIAN - VEC_STORE_BE(&ekey[r], ALIGNED_LOAD(&ekey[r]), bige_const); + VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const); #else - block rvec = ALIGNED_LOAD(&ekey[r]); - ALIGNED_STORE(&ekey[r], - vec_perm(rvec, rvec, vec_bswap32_const)); + block rvec = ALIGNED_LOAD (ekey, r); + ALIGNED_STORE (ekey, r, + vec_perm(rvec, rvec, vec_bswap32_const)); (void)bige_const; #endif } @@ -378,7 +479,7 @@ aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) rr = rounds; for (r = 0, rr = rounds; r <= rounds; r++, rr--) { - ALIGNED_STORE(&dkey[r], ALIGNED_LOAD(&ekey[rr])); + ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr)); } } @@ -394,18 +495,18 @@ unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, unsigned char *out, const unsigned char *in) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); const u128_t *rk = (u128_t *)&ctx->keyschenc; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block b; - b = VEC_LOAD_BE (in, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); return 0; /* does not use stack */ } @@ -415,18 +516,18 @@ unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, unsigned char *out, const unsigned char *in) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); const u128_t *rk = (u128_t *)&ctx->keyschdec; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block b; - b = VEC_LOAD_BE (in, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); AES_DECRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); return 0; /* does not use stack */ } @@ -436,41 +537,41 @@ void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; + ROUND_KEY_VARIABLES_ALL; block rkeylast_orig; block iv; - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); - PRELOAD_ROUND_KEYS (rounds); + PRELOAD_ROUND_KEYS_ALL (rounds); rkeylast_orig = rkeylast; for (; nblocks; nblocks--) { - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const); + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); - AES_ENCRYPT (iv, rounds); + AES_ENCRYPT_ALL (iv, rounds); - VEC_STORE_BE (out, iv, bige_const); + VEC_STORE_BE (out, 0, iv, bige_const); out++; in++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; @@ -483,7 +584,7 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, block b0, b1, b2, b3, b4, b5, b6, b7; block rkey; - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; @@ -491,34 +592,42 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, for (; nblocks >= 8; nblocks -= 8) { in0 = iv; - in1 = VEC_LOAD_BE (in + 0, bige_const); - in2 = VEC_LOAD_BE (in + 1, bige_const); - in3 = VEC_LOAD_BE (in + 2, bige_const); - in4 = VEC_LOAD_BE (in + 3, bige_const); - in5 = VEC_LOAD_BE (in + 4, bige_const); - in6 = VEC_LOAD_BE (in + 5, bige_const); - in7 = VEC_LOAD_BE (in + 6, bige_const); - iv = VEC_LOAD_BE (in + 7, bige_const); - - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; - b4 = rkey0 ^ in4; - b5 = rkey0 ^ in5; - b6 = rkey0 ^ in6; - b7 = rkey0 ^ in7; + in1 = VEC_LOAD_BE_NOSWAP (in, 0); + in2 = VEC_LOAD_BE_NOSWAP (in, 1); + in3 = VEC_LOAD_BE_NOSWAP (in, 2); + in4 = VEC_LOAD_BE_NOSWAP (in, 3); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in5 = VEC_LOAD_BE_NOSWAP (in, 4); + in6 = VEC_LOAD_BE_NOSWAP (in, 5); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in7 = VEC_LOAD_BE_NOSWAP (in, 6); + iv = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in7 = VEC_BE_SWAP (in7, bige_const); + iv = VEC_BE_SWAP (iv, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -542,48 +651,60 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ in1); - b1 = vec_cipherlast_be (b1, rkey ^ in2); - b2 = vec_cipherlast_be (b2, rkey ^ in3); - b3 = vec_cipherlast_be (b3, rkey ^ in4); - b4 = vec_cipherlast_be (b4, rkey ^ in5); - b5 = vec_cipherlast_be (b5, rkey ^ in6); - b6 = vec_cipherlast_be (b6, rkey ^ in7); - b7 = vec_cipherlast_be (b7, rkey ^ iv); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in4); + in7 = asm_xor (rkeylast, in7); + in0 = asm_xor (rkeylast, iv); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, in5); + b5 = asm_cipherlast_be (b5, in6); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, in7); + b7 = asm_cipherlast_be (b7, in0); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { in0 = iv; - in1 = VEC_LOAD_BE (in + 0, bige_const); - in2 = VEC_LOAD_BE (in + 1, bige_const); - in3 = VEC_LOAD_BE (in + 2, bige_const); - iv = VEC_LOAD_BE (in + 3, bige_const); + in1 = VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in, 1, bige_const); + in3 = VEC_LOAD_BE (in, 2, bige_const); + iv = VEC_LOAD_BE (in, 3, bige_const); - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -607,16 +728,18 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ in1); - b1 = vec_cipherlast_be (b1, rkey ^ in2); - b2 = vec_cipherlast_be (b2, rkey ^ in3); - b3 = vec_cipherlast_be (b3, rkey ^ iv); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in0 = asm_xor (rkeylast, iv); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in0); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -625,20 +748,20 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, for (; nblocks; nblocks--) { - bin = VEC_LOAD_BE (in, bige_const); + bin = VEC_LOAD_BE (in, 0, bige_const); rkeylast = rkeylast_orig ^ bin; b = iv; iv = bin; AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); out++; in++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } @@ -646,41 +769,41 @@ void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; + ROUND_KEY_VARIABLES_ALL; block lastiv, b; + unsigned int outadd = !cbc_mac; - lastiv = VEC_LOAD_BE (iv_arg, bige_const); + lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const); - PRELOAD_ROUND_KEYS (rounds); + PRELOAD_ROUND_KEYS_ALL (rounds); for (; nblocks; nblocks--) { - b = lastiv ^ VEC_LOAD_BE (in, bige_const); + b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const); - AES_ENCRYPT (b, rounds); + AES_ENCRYPT_ALL (b, rounds); lastiv = b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in++; - if (!cbc_mac) - out++; + out += outadd; } - VEC_STORE_BE (iv_arg, lastiv, bige_const); + VEC_STORE_BE (iv_arg, 0, lastiv, bige_const); } void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschdec; const u128_t *in = (const u128_t *)inbuf_arg; @@ -699,41 +822,49 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, ctx->decryption_prepared = 1; } - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; for (; nblocks >= 8; nblocks -= 8) { - in0 = VEC_LOAD_BE (in + 0, bige_const); - in1 = VEC_LOAD_BE (in + 1, bige_const); - in2 = VEC_LOAD_BE (in + 2, bige_const); - in3 = VEC_LOAD_BE (in + 3, bige_const); - in4 = VEC_LOAD_BE (in + 4, bige_const); - in5 = VEC_LOAD_BE (in + 5, bige_const); - in6 = VEC_LOAD_BE (in + 6, bige_const); - in7 = VEC_LOAD_BE (in + 7, bige_const); - - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; - b4 = rkey0 ^ in4; - b5 = rkey0 ^ in5; - b6 = rkey0 ^ in6; - b7 = rkey0 ^ in7; + in0 = VEC_LOAD_BE_NOSWAP (in, 0); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -757,48 +888,60 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ iv); - b1 = vec_ncipherlast_be (b1, rkey ^ in0); - b2 = vec_ncipherlast_be (b2, rkey ^ in1); - b3 = vec_ncipherlast_be (b3, rkey ^ in2); - b4 = vec_ncipherlast_be (b4, rkey ^ in3); - b5 = vec_ncipherlast_be (b5, rkey ^ in4); - b6 = vec_ncipherlast_be (b6, rkey ^ in5); - b7 = vec_ncipherlast_be (b7, rkey ^ in6); + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + b0 = asm_ncipherlast_be (b0, iv); iv = in7; - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b1 = asm_ncipherlast_be (b1, in0); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, in3); + b5 = asm_ncipherlast_be (b5, in4); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, in5); + b7 = asm_ncipherlast_be (b7, in6); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - in0 = VEC_LOAD_BE (in + 0, bige_const); - in1 = VEC_LOAD_BE (in + 1, bige_const); - in2 = VEC_LOAD_BE (in + 2, bige_const); - in3 = VEC_LOAD_BE (in + 3, bige_const); + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -822,17 +965,21 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ iv); - b1 = vec_ncipherlast_be (b1, rkey ^ in0); - b2 = vec_ncipherlast_be (b2, rkey ^ in1); - b3 = vec_ncipherlast_be (b3, rkey ^ in2); + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + + b0 = asm_ncipherlast_be (b0, iv); iv = in3; + b1 = asm_ncipherlast_be (b1, in0); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -843,17 +990,17 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, { rkeylast = rkeylast_orig ^ iv; - iv = VEC_LOAD_BE (in, bige_const); + iv = VEC_LOAD_BE (in, 0, bige_const); b = iv; AES_DECRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } @@ -863,7 +1010,7 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, { static const unsigned char vec_one_const[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; @@ -873,56 +1020,80 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, block rkeylast_orig; block ctr, b, one; - ctr = VEC_LOAD_BE (ctr_arg, bige_const); - one = VEC_LOAD_BE (&vec_one_const, bige_const); + ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const); + one = VEC_LOAD_BE (&vec_one_const, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; if (nblocks >= 4) { + block in0, in1, in2, in3, in4, in5, in6, in7; block b0, b1, b2, b3, b4, b5, b6, b7; block two, three, four; - block ctr4; block rkey; - two = vec_add_uint128 (one, one); - three = vec_add_uint128 (two, one); - four = vec_add_uint128 (two, two); + two = asm_add_uint128 (one, one); + three = asm_add_uint128 (two, one); + four = asm_add_uint128 (two, two); for (; nblocks >= 8; nblocks -= 8) { - ctr4 = vec_add_uint128 (ctr, four); - b0 = rkey0 ^ ctr; - b1 = rkey0 ^ vec_add_uint128 (ctr, one); - b2 = rkey0 ^ vec_add_uint128 (ctr, two); - b3 = rkey0 ^ vec_add_uint128 (ctr, three); - b4 = rkey0 ^ ctr4; - b5 = rkey0 ^ vec_add_uint128 (ctr4, one); - b6 = rkey0 ^ vec_add_uint128 (ctr4, two); - b7 = rkey0 ^ vec_add_uint128 (ctr4, three); - ctr = vec_add_uint128 (ctr4, four); + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b4 = asm_add_uint128 (ctr, four); + b5 = asm_add_uint128 (b1, four); + b6 = asm_add_uint128 (b2, four); + b7 = asm_add_uint128 (b3, four); + b0 = asm_xor (rkey0, ctr); + rkey = ALIGNED_LOAD (rk, 1); + ctr = asm_add_uint128 (b4, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + b0 = asm_cipher_be (b0, rkey); + b1 = asm_cipher_be (b1, rkey); + b2 = asm_cipher_be (b2, rkey); + b3 = asm_cipher_be (b3, rkey); + b4 = asm_xor (rkey0, b4); + b5 = asm_xor (rkey0, b5); + b6 = asm_xor (rkey0, b6); + b7 = asm_xor (rkey0, b7); + b4 = asm_cipher_be (b4, rkey); + b5 = asm_cipher_be (b5, rkey); + b6 = asm_cipher_be (b6, rkey); + b7 = asm_cipher_be (b7, rkey); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); - - DO_ROUND(1); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + in0 = VEC_LOAD_BE_NOSWAP (in, 0); DO_ROUND(2); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); DO_ROUND(3); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); DO_ROUND(4); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); DO_ROUND(5); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); DO_ROUND(6); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); DO_ROUND(7); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); DO_ROUND(8); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; DO_ROUND(9); + if (rounds >= 12) { DO_ROUND(10); @@ -936,43 +1107,68 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const)); - b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const)); - b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const)); - b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const)); - b4 = vec_cipherlast_be (b4, rkey ^ VEC_LOAD_BE (in + 4, bige_const)); - b5 = vec_cipherlast_be (b5, rkey ^ VEC_LOAD_BE (in + 5, bige_const)); - b6 = vec_cipherlast_be (b6, rkey ^ VEC_LOAD_BE (in + 6, bige_const)); - b7 = vec_cipherlast_be (b7, rkey ^ VEC_LOAD_BE (in + 7, bige_const)); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + in4 = asm_xor (rkeylast, in4); + in5 = asm_xor (rkeylast, in5); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + in6 = asm_xor (rkeylast, in6); + in7 = asm_xor (rkeylast, in7); + b4 = asm_cipherlast_be (b4, in4); + b5 = asm_cipherlast_be (b5, in5); + b6 = asm_cipherlast_be (b6, in6); + b7 = asm_cipherlast_be (b7, in7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - b0 = rkey0 ^ ctr; - b1 = rkey0 ^ vec_add_uint128 (ctr, one); - b2 = rkey0 ^ vec_add_uint128 (ctr, two); - b3 = rkey0 ^ vec_add_uint128 (ctr, three); - ctr = vec_add_uint128 (ctr, four); + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b0 = asm_xor (rkey0, ctr); + ctr = asm_add_uint128 (ctr, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -982,6 +1178,12 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); + + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); + DO_ROUND(9); if (rounds >= 12) { @@ -996,16 +1198,21 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const)); - b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const)); - b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const)); - b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const)); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + in += 4; out += 4; nblocks -= 4; @@ -1015,18 +1222,18 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, for (; nblocks; nblocks--) { b = ctr; - ctr = vec_add_uint128 (ctr, one); - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const); + ctr = asm_add_uint128 (ctr, one); + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); out++; in++; } - VEC_STORE_BE (ctr_arg, ctr, bige_const); + VEC_STORE_BE (ctr_arg, 0, ctr, bige_const); } @@ -1034,7 +1241,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; @@ -1043,16 +1250,16 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, block l0, l1, l2, l; block b0, b1, b2, b3, b4, b5, b6, b7, b; block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; - block rkey; + block rkey, rkeylf; block ctr, iv; ROUND_KEY_VARIABLES; - iv = VEC_LOAD_BE (c->u_iv.iv, bige_const); - ctr = VEC_LOAD_BE (c->u_ctr.ctr, bige_const); + iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const); - l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], bige_const); - l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], bige_const); - l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], bige_const); + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); if (encrypt) { @@ -1062,8 +1269,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1074,7 +1281,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, AES_ENCRYPT (b, rounds); b ^= iv; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1082,16 +1289,25 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); - b4 = VEC_LOAD_BE (in + 4, bige_const); - b5 = VEC_LOAD_BE (in + 5, bige_const); - b6 = VEC_LOAD_BE (in + 6, bige_const); - b7 = VEC_LOAD_BE (in + 7, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; @@ -1117,15 +1333,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv7 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1134,7 +1350,20 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + DO_ROUND(9); if (rounds >= 12) { @@ -1149,37 +1378,42 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND - rkey = rkeylast ^ rkey0; - b0 = vec_cipherlast_be (b0, rkey ^ iv0); - b1 = vec_cipherlast_be (b1, rkey ^ iv1); - b2 = vec_cipherlast_be (b2, rkey ^ iv2); - b3 = vec_cipherlast_be (b3, rkey ^ iv3); - b4 = vec_cipherlast_be (b4, rkey ^ iv4); - b5 = vec_cipherlast_be (b5, rkey ^ iv5); - b6 = vec_cipherlast_be (b6, rkey ^ iv6); - b7 = vec_cipherlast_be (b7, rkey ^ iv7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_cipherlast_be (b0, iv0); + b1 = asm_cipherlast_be (b1, iv1); + b2 = asm_cipherlast_be (b2, iv2); + b3 = asm_cipherlast_be (b3, iv3); + b4 = asm_cipherlast_be (b4, iv4); + b5 = asm_cipherlast_be (b5, iv5); + b6 = asm_cipherlast_be (b6, iv6); + b7 = asm_cipherlast_be (b7, iv7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1197,11 +1431,11 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1226,15 +1460,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND rkey = rkeylast ^ rkey0; - b0 = vec_cipherlast_be (b0, rkey ^ iv0); - b1 = vec_cipherlast_be (b1, rkey ^ iv1); - b2 = vec_cipherlast_be (b2, rkey ^ iv2); - b3 = vec_cipherlast_be (b3, rkey ^ iv3); + b0 = asm_cipherlast_be (b0, rkey ^ iv0); + b1 = asm_cipherlast_be (b1, rkey ^ iv1); + b2 = asm_cipherlast_be (b2, rkey ^ iv2); + b3 = asm_cipherlast_be (b3, rkey ^ iv3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -1243,8 +1477,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1255,7 +1489,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, AES_ENCRYPT (b, rounds); b ^= iv; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1275,8 +1509,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1287,7 +1521,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1295,16 +1529,25 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); - b4 = VEC_LOAD_BE (in + 4, bige_const); - b5 = VEC_LOAD_BE (in + 5, bige_const); - b6 = VEC_LOAD_BE (in + 6, bige_const); - b7 = VEC_LOAD_BE (in + 7, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); iv ^= rkey0; @@ -1328,15 +1571,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv7 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1345,7 +1588,20 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + DO_ROUND(9); if (rounds >= 12) { @@ -1360,39 +1616,44 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND - rkey = rkeylast ^ rkey0; - b0 = vec_ncipherlast_be (b0, rkey ^ iv0); - b1 = vec_ncipherlast_be (b1, rkey ^ iv1); - b2 = vec_ncipherlast_be (b2, rkey ^ iv2); - b3 = vec_ncipherlast_be (b3, rkey ^ iv3); - b4 = vec_ncipherlast_be (b4, rkey ^ iv4); - b5 = vec_ncipherlast_be (b5, rkey ^ iv5); - b6 = vec_ncipherlast_be (b6, rkey ^ iv6); - b7 = vec_ncipherlast_be (b7, rkey ^ iv7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); + b0 = asm_ncipherlast_be (b0, iv0); + b1 = asm_ncipherlast_be (b1, iv1); + b2 = asm_ncipherlast_be (b2, iv2); + b3 = asm_ncipherlast_be (b3, iv3); + b4 = asm_ncipherlast_be (b4, iv4); + b5 = asm_ncipherlast_be (b5, iv5); + b6 = asm_ncipherlast_be (b6, iv6); + b7 = asm_ncipherlast_be (b7, iv7); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; - in += 8; + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); iv ^= rkey0; @@ -1408,11 +1669,11 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1437,15 +1698,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND rkey = rkeylast ^ rkey0; - b0 = vec_ncipherlast_be (b0, rkey ^ iv0); - b1 = vec_ncipherlast_be (b1, rkey ^ iv1); - b2 = vec_ncipherlast_be (b2, rkey ^ iv2); - b3 = vec_ncipherlast_be (b3, rkey ^ iv3); + b0 = asm_ncipherlast_be (b0, rkey ^ iv0); + b1 = asm_ncipherlast_be (b1, rkey ^ iv1); + b2 = asm_ncipherlast_be (b2, rkey ^ iv2); + b3 = asm_ncipherlast_be (b3, rkey ^ iv3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1456,8 +1717,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1468,15 +1729,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; } } - VEC_STORE_BE (c->u_iv.iv, iv, bige_const); - VEC_STORE_BE (c->u_ctr.ctr, ctr, bige_const); + VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const); + VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const); c->u_mode.ocb.data_nblocks = data_nblocks; return 0; @@ -1485,7 +1746,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *abuf = (const u128_t *)abuf_arg; @@ -1498,19 +1759,19 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, block ctr, iv; ROUND_KEY_VARIABLES; - iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, bige_const); - ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, bige_const); + iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const); - l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], bige_const); - l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], bige_const); - l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], bige_const); + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); PRELOAD_ROUND_KEYS (rounds); for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (abuf, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1524,16 +1785,16 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (abuf + 0, bige_const); - b1 = VEC_LOAD_BE (abuf + 1, bige_const); - b2 = VEC_LOAD_BE (abuf + 2, bige_const); - b3 = VEC_LOAD_BE (abuf + 3, bige_const); - b4 = VEC_LOAD_BE (abuf + 4, bige_const); - b5 = VEC_LOAD_BE (abuf + 5, bige_const); - b6 = VEC_LOAD_BE (abuf + 6, bige_const); - b7 = VEC_LOAD_BE (abuf + 7, bige_const); + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); + b4 = VEC_LOAD_BE (abuf, 4, bige_const); + b5 = VEC_LOAD_BE (abuf, 5, bige_const); + b6 = VEC_LOAD_BE (abuf, 6, bige_const); + b7 = VEC_LOAD_BE (abuf, 7, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const); frkey = rkey0; iv ^= frkey; @@ -1558,15 +1819,15 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, iv = iv7 ^ frkey; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1591,14 +1852,14 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey); - b1 = vec_cipherlast_be (b1, rkey); - b2 = vec_cipherlast_be (b2, rkey); - b3 = vec_cipherlast_be (b3, rkey); - b4 = vec_cipherlast_be (b4, rkey); - b5 = vec_cipherlast_be (b5, rkey); - b6 = vec_cipherlast_be (b6, rkey); - b7 = vec_cipherlast_be (b7, rkey); + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); + b4 = asm_cipherlast_be (b4, rkey); + b5 = asm_cipherlast_be (b5, rkey); + b6 = asm_cipherlast_be (b6, rkey); + b7 = asm_cipherlast_be (b7, rkey); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; @@ -1607,12 +1868,12 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (abuf + 0, bige_const); - b1 = VEC_LOAD_BE (abuf + 1, bige_const); - b2 = VEC_LOAD_BE (abuf + 2, bige_const); - b3 = VEC_LOAD_BE (abuf + 3, bige_const); + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); frkey = rkey0; iv ^= frkey; @@ -1629,11 +1890,11 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, iv = iv3 ^ frkey; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1658,10 +1919,10 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey); - b1 = vec_cipherlast_be (b1, rkey); - b2 = vec_cipherlast_be (b2, rkey); - b3 = vec_cipherlast_be (b3, rkey); + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1671,8 +1932,8 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (abuf, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1684,8 +1945,8 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, abuf += 1; } - VEC_STORE_BE (c->u_mode.ocb.aad_offset, iv, bige_const); - VEC_STORE_BE (c->u_mode.ocb.aad_sum, ctr, bige_const); + VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const); + VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const); c->u_mode.ocb.aad_nblocks = data_nblocks; return 0; @@ -1696,44 +1957,59 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { +#ifdef WORDS_BIGENDIAN static const block vec_bswap64_const = - { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 }; static const block vec_bswap128_const = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; +#else + static const block vec_bswap64_const = + { ~8, ~9, ~10, ~11, ~12, ~13, ~14, ~15, ~0, ~1, ~2, ~3, ~4, ~5, ~6, ~7 }; + static const block vec_bswap128_const = + { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 }; + static const block vec_tweakin_swap_const = + { ~12, ~13, ~14, ~15, ~8, ~9, ~10, ~11, ~4, ~5, ~6, ~7, ~0, ~1, ~2, ~3 }; +#endif static const unsigned char vec_tweak_const[16] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 }; static const vector unsigned long long vec_shift63_const = { 63, 63 }; static const vector unsigned long long vec_shift1_const = { 1, 1 }; - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - block tweak_tmp, tweak_next, tweak; - block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey; + block tweak; + block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf; block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7; block tweak_const, bswap64_const, bswap128_const; vector unsigned long long shift63_const, shift1_const; ROUND_KEY_VARIABLES; - tweak_const = VEC_LOAD_BE (&vec_tweak_const, bige_const); - bswap64_const = ALIGNED_LOAD (&vec_bswap64_const); - bswap128_const = ALIGNED_LOAD (&vec_bswap128_const); - shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const); - shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const); + tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const); + bswap64_const = ALIGNED_LOAD (&vec_bswap64_const, 0); + bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0); + shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const, 0); + shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const, 0); - tweak_next = VEC_LOAD_BE (tweak_arg, bige_const); +#ifdef WORDS_BIGENDIAN + tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const); + tweak = asm_vperm1 (tweak, bswap128_const); +#else + tweak = VEC_LOAD_BE (tweak_arg, 0, vec_tweakin_swap_const); +#endif -#define GEN_TWEAK(tweak, tmp) /* Generate next tweak. */ \ - tmp = vec_vperm(tweak, tweak, bswap64_const); \ - tweak = vec_vperm(tweak, tweak, bswap128_const); \ - tmp = (block)(vec_sra((vector unsigned long long)tmp, shift63_const)) & \ - tweak_const; \ - tweak = (block)vec_sl((vector unsigned long long)tweak, shift1_const); \ - tweak = tweak ^ tmp; \ - tweak = vec_vperm(tweak, tweak, bswap128_const); +#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \ + do { \ + block tmp1, tmp2; \ + tmp1 = asm_vperm1((tin), bswap64_const); \ + tmp2 = (block)vec_sl((vector unsigned long long)(tin), shift1_const); \ + tmp1 = (block)(vec_sra((vector unsigned long long)tmp1, shift63_const)) & \ + tweak_const; \ + tout = asm_xor(tmp1, tmp2); \ + } while (0) if (encrypt) { @@ -1743,42 +2019,70 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks >= 8; nblocks -= 8) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak4 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak5 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak6 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak7 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; - b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0; - b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0; - b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0; - b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0; + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1787,7 +2091,20 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + DO_ROUND(9); if (rounds >= 12) { @@ -1802,51 +2119,62 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ tweak0); - b1 = vec_cipherlast_be (b1, rkey ^ tweak1); - b2 = vec_cipherlast_be (b2, rkey ^ tweak2); - b3 = vec_cipherlast_be (b3, rkey ^ tweak3); - b4 = vec_cipherlast_be (b4, rkey ^ tweak4); - b5 = vec_cipherlast_be (b5, rkey ^ tweak5); - b6 = vec_cipherlast_be (b6, rkey ^ tweak6); - b7 = vec_cipherlast_be (b7, rkey ^ tweak7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_cipherlast_be (b0, tweak0); + b1 = asm_cipherlast_be (b1, tweak1); + b2 = asm_cipherlast_be (b2, tweak2); + b3 = asm_cipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, tweak4); + b5 = asm_cipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, tweak6); + b7 = asm_cipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1871,15 +2199,15 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ tweak0); - b1 = vec_cipherlast_be (b1, rkey ^ tweak1); - b2 = vec_cipherlast_be (b2, rkey ^ tweak2); - b3 = vec_cipherlast_be (b3, rkey ^ tweak3); + b0 = asm_cipherlast_be (b0, rkey ^ tweak0); + b1 = asm_cipherlast_be (b1, rkey ^ tweak1); + b2 = asm_cipherlast_be (b2, rkey ^ tweak2); + b3 = asm_cipherlast_be (b3, rkey ^ tweak3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -1888,18 +2216,18 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks; nblocks--) { - tweak = tweak_next; + tweak0 = asm_vperm1 (tweak, bswap128_const); /* Xor-Encrypt/Decrypt-Xor block. */ - b = VEC_LOAD_BE (in, bige_const) ^ tweak; + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; /* Generate next tweak. */ - GEN_TWEAK (tweak_next, tweak_tmp); + GEN_TWEAK (tweak, tweak); AES_ENCRYPT (b, rounds); - b ^= tweak; - VEC_STORE_BE (out, b, bige_const); + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; @@ -1919,42 +2247,70 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks >= 8; nblocks -= 8) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak4 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak5 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak6 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak7 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; - b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0; - b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0; - b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0; - b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0; + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1963,7 +2319,20 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + DO_ROUND(9); if (rounds >= 12) { @@ -1978,51 +2347,62 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ tweak0); - b1 = vec_ncipherlast_be (b1, rkey ^ tweak1); - b2 = vec_ncipherlast_be (b2, rkey ^ tweak2); - b3 = vec_ncipherlast_be (b3, rkey ^ tweak3); - b4 = vec_ncipherlast_be (b4, rkey ^ tweak4); - b5 = vec_ncipherlast_be (b5, rkey ^ tweak5); - b6 = vec_ncipherlast_be (b6, rkey ^ tweak6); - b7 = vec_ncipherlast_be (b7, rkey ^ tweak7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_ncipherlast_be (b0, tweak0); + b1 = asm_ncipherlast_be (b1, tweak1); + b2 = asm_ncipherlast_be (b2, tweak2); + b3 = asm_ncipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, tweak4); + b5 = asm_ncipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, tweak6); + b7 = asm_ncipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -2047,15 +2427,15 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ tweak0); - b1 = vec_ncipherlast_be (b1, rkey ^ tweak1); - b2 = vec_ncipherlast_be (b2, rkey ^ tweak2); - b3 = vec_ncipherlast_be (b3, rkey ^ tweak3); + b0 = asm_ncipherlast_be (b0, rkey ^ tweak0); + b1 = asm_ncipherlast_be (b1, rkey ^ tweak1); + b2 = asm_ncipherlast_be (b2, rkey ^ tweak2); + b3 = asm_ncipherlast_be (b3, rkey ^ tweak3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -2064,25 +2444,30 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks; nblocks--) { - tweak = tweak_next; + tweak0 = asm_vperm1 (tweak, bswap128_const); /* Xor-Encrypt/Decrypt-Xor block. */ - b = VEC_LOAD_BE (in, bige_const) ^ tweak; + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; /* Generate next tweak. */ - GEN_TWEAK (tweak_next, tweak_tmp); + GEN_TWEAK (tweak, tweak); AES_DECRYPT (b, rounds); - b ^= tweak; - VEC_STORE_BE (out, b, bige_const); + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } } - VEC_STORE_BE (tweak_arg, tweak_next, bige_const); +#ifdef WORDS_BIGENDIAN + tweak = asm_vperm1 (tweak, bswap128_const); + VEC_STORE_BE (tweak_arg, 0, tweak, bige_const); +#else + VEC_STORE_BE (tweak_arg, 0, tweak, vec_tweakin_swap_const); +#endif #undef GEN_TWEAK } From f34de41b9db5412f253b9b984522427638aae115 Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Thu, 16 Jan 2020 14:38:29 +0900 Subject: [PATCH 04/31] Register DCO for Tianjia Zhang. -- Signed-off-by: NIIBE Yutaka --- AUTHORS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AUTHORS b/AUTHORS index 280d1303..8271c0d8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -199,6 +199,9 @@ Shawn Landden Stephan Mueller 2014-08-22:2008899.25OeoelVVA@myon.chronox.de: +Tianjia Zhang +2020-01-08:dcda0127-2f45-93a3-0736-27259a33bffa@linux.alibaba.com: + TomĂ¡Å¡ MrĂ¡z 2012-04-16:1334571250.5056.52.camel@vespa.frost.loc: From d154c1e9e11019980253f0a65758932cd0656470 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 22 Dec 2019 17:20:11 +0800 Subject: [PATCH 05/31] Add new curve named sm2p256v1. * cipher/ecc-curves.c (domain_parms): Add sm2p256v1 for SM2. * tests/curves.c (N_CURVES): Update N_CURVES for SM2. Signed-off-by: Tianjia Zhang --- cipher/ecc-curves.c | 14 ++++++++++++++ tests/curves.c | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cipher/ecc-curves.c b/cipher/ecc-curves.c index 52872c5e..1592d23a 100644 --- a/cipher/ecc-curves.c +++ b/cipher/ecc-curves.c @@ -115,6 +115,8 @@ static const struct { "secp256k1", "1.3.132.0.10" }, + { "sm2p256v1", "1.2.156.10197.1.301" }, + { NULL, NULL} }; @@ -512,6 +514,18 @@ static const ecc_domain_parms_t domain_parms[] = 1 }, + { + "sm2p256v1", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffeffffffffffffffffffffffffffffffff00000000ffffffffffffffff", + "0xfffffffeffffffffffffffffffffffffffffffff00000000fffffffffffffffc", + "0x28e9fa9e9d9f5e344d5a9e4bcf6509a7f39789f515ab8f92ddbcbd414d940e93", + "0xfffffffeffffffffffffffffffffffff7203df6b21c6052b53bbf40939d54123", + "0x32c4ae2c1f1981195f9904466a39c9948fe30bbff2660be1715a4589334c74c7", + "0xbc3736a2f4f6779c59bdcee36b692153d0a9877cc62a474002df32e52139f0a0", + 1 + }, + { NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } }; diff --git a/tests/curves.c b/tests/curves.c index ff244bd1..0dfa2acb 100644 --- a/tests/curves.c +++ b/tests/curves.c @@ -33,7 +33,7 @@ #include "t-common.h" /* Number of curves defined in ../cipger/ecc-curves.c */ -#define N_CURVES 25 +#define N_CURVES 26 /* A real world sample public key. */ static char const sample_key_1[] = From 176a5f162acd0cfebc5517d061205681bc3658d0 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 19 Jan 2020 08:38:54 +0200 Subject: [PATCH 06/31] Update .gitignore Signed-off-by: Tianjia Zhang --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 704d3ca0..99741c18 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,8 @@ cipher/libcipher.la compat/Makefile compat/libcompat.la doc/gcrypt.info +doc/gcrypt.info-1 +doc/gcrypt.info-2 doc/stamp-vti doc/version.texi doc/Makefile @@ -65,6 +67,7 @@ src/gcrypt.h src/hmac256 src/libgcrypt-config src/libgcrypt.la +src/libgcrypt.pc src/mpicalc src/versioninfo.rc src/*.exe @@ -103,6 +106,8 @@ tests/t-lock tests/t-mpi-bit tests/t-mpi-point tests/t-sexp +tests/t-secmem +tests/t-x448 tests/tsexp tests/version tests/*.exe From 43cfc1632dd3a9579a906f31cd3b6c88d242d1a5 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 19 Jan 2020 08:38:54 +0200 Subject: [PATCH 07/31] ecc: Wrong flag and elements_enc fix. * cipher/ecc.c (ecc_generate): Fix wrong flag and elements_enc. -- Signed-off-by: Tianjia Zhang --- cipher/ecc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index 921510cc..10e11243 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -577,7 +577,7 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) (&curve_flags, NULL, ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? "(flags param eddsa)" : - ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? + ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_DJB_TWEAK))? "(flags param djb-tweak)" : ((flags & PUBKEY_FLAG_PARAM))? "(flags param)" : ((flags & PUBKEY_FLAG_EDDSA))? @@ -1712,7 +1712,7 @@ gcry_pk_spec_t _gcry_pubkey_spec_ecc = GCRY_PK_ECC, { 0, 1 }, (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR), "ECC", ecc_names, - "pabgnhq", "pabgnhqd", "sw", "rs", "pabgnhq", + "pabgnhq", "pabgnhqd", "se", "rs", "pabgnhq", ecc_generate, ecc_check_secret_key, ecc_encrypt_raw, From 7e3aac7ba49b3b6e6c5ebe7c880b5b323c423ef7 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 19 Jan 2020 08:38:54 +0200 Subject: [PATCH 08/31] mpi: Fix error that point not uninitialized * cipher/ecc-curves.c (mpi_ec_get_elliptic_curve): Initialize E->G poing -- Signed-off-by: Tianjia Zhang --- cipher/ecc-curves.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cipher/ecc-curves.c b/cipher/ecc-curves.c index 1592d23a..92850ac7 100644 --- a/cipher/ecc-curves.c +++ b/cipher/ecc-curves.c @@ -1058,6 +1058,7 @@ mpi_ec_get_elliptic_curve (elliptic_curve_t *E, int *r_flags, goto leave; if (G) { + _gcry_mpi_point_init (&E->G); mpi_point_set (&E->G, G->x, G->y, G->z); mpi_point_set (G, NULL, NULL, NULL); mpi_point_release (G); From 5ebb2f0671c902863eee91cbcfc85a72be506410 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 19 Jan 2020 08:38:54 +0200 Subject: [PATCH 09/31] gcrypt.texi: fix GCRYCTL_GET_ALGO_NENCR typo * doc/gcrypt.texi: Fix GCRYCTL_GET_ALGO_NENC to GCRYCTL_GET_ALGO_NENCR. -- Signed-off-by: Jussi Kivilinna --- doc/gcrypt.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index d7bfa4c2..091704de 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -2824,7 +2824,7 @@ Return the number of elements a signature created with the algorithm @var{algo} consists of. Return 0 for an unknown algorithm or for an algorithm not capable of creating signatures. -@item GCRYCTL_GET_ALGO_NENC +@item GCRYCTL_GET_ALGO_NENCR Return the number of elements a encrypted message created with the algorithm @var{algo} consists of. Return 0 for an unknown algorithm or for an algorithm not capable of encryption. From 4997139b3e83761c9af0246cec829305c3d7d13b Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Tue, 21 Jan 2020 12:05:23 +0900 Subject: [PATCH 10/31] Avoid use of ulong in internal code. * configure.ac (HAVE_ULONG_TYPEDEF): Remove. * mpi/mpi-div.c (_gcry_mpi_fdiv_r_ui): Use unsigned long. (_gcry_mpi_divisible_ui): Likewise. * random/rndunix.c (_gcry_rndunix_gather_random): Likewise. * random/rndw32.c (_gcry_rndw32_gather_random_fast): Likewise. (ADDINT): Likewise. * random/rndw32ce.c (_gcry_rndw32ce_gather_random_fast): Likewise. * src/mpi.h: Follow the change. * src/types.h (HAVE_ULONG_TYPEDEF): Remove. Signed-off-by: NIIBE Yutaka --- configure.ac | 1 - mpi/mpi-div.c | 7 ++++--- random/rndunix.c | 4 ++-- random/rndw32.c | 4 ++-- random/rndw32ce.c | 2 +- src/mpi.h | 4 ++-- src/types.h | 6 ------ 7 files changed, 11 insertions(+), 17 deletions(-) diff --git a/configure.ac b/configure.ac index 4d4fb49a..57884162 100644 --- a/configure.ac +++ b/configure.ac @@ -834,7 +834,6 @@ AC_TYPE_PID_T GNUPG_CHECK_TYPEDEF(byte, HAVE_BYTE_TYPEDEF) GNUPG_CHECK_TYPEDEF(ushort, HAVE_USHORT_TYPEDEF) -GNUPG_CHECK_TYPEDEF(ulong, HAVE_ULONG_TYPEDEF) GNUPG_CHECK_TYPEDEF(u16, HAVE_U16_TYPEDEF) GNUPG_CHECK_TYPEDEF(u32, HAVE_U32_TYPEDEF) diff --git a/mpi/mpi-div.c b/mpi/mpi-div.c index 9ac99c31..eb621fe4 100644 --- a/mpi/mpi-div.c +++ b/mpi/mpi-div.c @@ -64,8 +64,9 @@ _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ) * rem is optional */ -ulong -_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor ) +unsigned long +_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, + unsigned long divisor ) { mpi_limb_t rlimb; @@ -321,7 +322,7 @@ _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned int count ) * (note: divisor must fit into a limb) */ int -_gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor ) +_gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor ) { return !_gcry_mpih_mod_1( dividend->d, dividend->nlimbs, divisor ); } diff --git a/random/rndunix.c b/random/rndunix.c index fcb45b78..aff2f85d 100644 --- a/random/rndunix.c +++ b/random/rndunix.c @@ -894,7 +894,7 @@ _gcry_rndunix_gather_random (void (*add)(const void*, size_t, /* now read from the gatherer */ while( length ) { int goodness; - ulong subtract; + unsigned long subtract; if( read_a_msg( pipedes[0], &msg ) ) { log_error("reading from gatherer pipe failed: %s\n", @@ -928,7 +928,7 @@ _gcry_rndunix_gather_random (void (*add)(const void*, size_t, (*add)( msg.data, n, origin ); /* this is the trick how we cope with the goodness */ - subtract = (ulong)n * goodness / 100; + subtract = (unsigned long)n * goodness / 100; /* subtract at least 1 byte to avoid infinite loops */ length -= subtract ? subtract : 1; } diff --git a/random/rndw32.c b/random/rndw32.c index 08a8867d..b3f63d20 100644 --- a/random/rndw32.c +++ b/random/rndw32.c @@ -845,10 +845,10 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, We discard the upper 32-bit of those values. */ { - byte buffer[20*sizeof(ulong)], *bufptr; + byte buffer[20*sizeof(unsigned long)], *bufptr; bufptr = buffer; -#define ADDINT(f) do { ulong along = (ulong)(f); \ +#define ADDINT(f) do { unsigned long along = (unsigned long)(f); \ memcpy (bufptr, &along, sizeof (along) ); \ bufptr += sizeof (along); \ } while (0) diff --git a/random/rndw32ce.c b/random/rndw32ce.c index b485eef2..873e8460 100644 --- a/random/rndw32ce.c +++ b/random/rndw32ce.c @@ -115,7 +115,7 @@ _gcry_rndw32ce_gather_random_fast (void (*add)(const void*, size_t, memcpy (bufptr, &along, sizeof (along)); \ bufptr += sizeof (along); \ } while (0) - unsigned char buffer[20*sizeof(ulong)], *bufptr; + unsigned char buffer[20*sizeof(unsigned long)], *bufptr; bufptr = buffer; ADD (HWND, GetActiveWindow ()); diff --git a/src/mpi.h b/src/mpi.h index c342ff48..39312fc3 100644 --- a/src/mpi.h +++ b/src/mpi.h @@ -182,14 +182,14 @@ gpg_err_code_t _gcry_mpi_to_octet_string (unsigned char **r_frame, #define mpi_tdiv_q_2exp(a,b,c) _gcry_mpi_tdiv_q_2exp((a),(b),(c)) #define mpi_divisible_ui(a,b) _gcry_mpi_divisible_ui((a),(b)) -ulong _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor ); +unsigned long _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, unsigned long divisor ); void _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_fdiv_q( gcry_mpi_t quot, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_fdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_tdiv_r( gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den); void _gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den); void _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned count ); -int _gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor ); +int _gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor ); /*-- mpi-mod.c --*/ diff --git a/src/types.h b/src/types.h index 645ddd62..39393be1 100644 --- a/src/types.h +++ b/src/types.h @@ -70,12 +70,6 @@ # define HAVE_USHORT_TYPEDEF #endif -#ifndef HAVE_ULONG_TYPEDEF -# undef ulong /* In case there is a macro with that name. */ - typedef unsigned long ulong; -# define HAVE_ULONG_TYPEDEF -#endif - #ifndef HAVE_U16_TYPEDEF # undef u16 /* In case there is a macro with that name. */ # if SIZEOF_UNSIGNED_INT == 2 From 95e9cee802419adf6f4b01b29d7874793004fa8d Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Tue, 21 Jan 2020 13:16:47 +0900 Subject: [PATCH 11/31] tests: Fix check_pubkey. * tests/basic.c (check_pubkey): Fix constants of pubkeys. Signed-off-by: Tianjia Zhang Co-authored-by: NIIBE Yutaka --- tests/basic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/basic.c b/tests/basic.c index 8337bcfb..a3454abf 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -12984,7 +12984,7 @@ check_pubkey (void) { static const test_spec_pubkey_t pubkeys[] = { { - GCRY_PK_RSA, FLAG_CRYPT | FLAG_SIGN, + GCRY_PK_RSA, FLAG_CRYPT | FLAG_SIGN | FLAG_GRIP, { "(private-key\n" " (rsa\n" @@ -13022,7 +13022,7 @@ check_pubkey (void) "\xa2\x5d\x3d\x69\xf8\x6d\x37\xa4\xf9\x39"} }, { - GCRY_PK_DSA, FLAG_SIGN, + GCRY_PK_DSA, FLAG_SIGN | FLAG_GRIP, { "(private-key\n" " (DSA\n" @@ -13067,7 +13067,7 @@ check_pubkey (void) "\x4a\xa6\xf9\xeb\x23\xbf\xa9\x12\x2d\x5b" } }, { - GCRY_PK_ELG, FLAG_SIGN | FLAG_CRYPT, + GCRY_PK_ELG, FLAG_SIGN | FLAG_CRYPT | FLAG_GRIP, { "(private-key\n" " (ELG\n" @@ -13246,7 +13246,7 @@ check_pubkey (void) " 4DDFF75C45415C1D9DD9DD33612CD530EFE137C7C90CD4" " 0B0F5621DC3AC1B751CFA0E2634FA0503B3D52639F5D7F" " B72AFD61EA199441D943FFE7F0C70A2759A3CDB84C114E" - " 1F9339FDF27F35ECA93677BEEC#)))\n" + " 1F9339FDF27F35ECA93677BEEC#)))\n", "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } @@ -13268,7 +13268,7 @@ check_pubkey (void) " (curve secp256k1)\n" " (q #0439A36013301597DAEF41FBE593A02CC513D0B55527EC2D" " F1050E2E8FF49C85C23CBE7DED0E7CE6A594896B8F62888F" - " DBC5C8821305E2EA42BF01E37300116281#)))\n" + " DBC5C8821305E2EA42BF01E37300116281#)))\n", "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } From 8d9958910e54f3fecbab6e133c3971843f6ef310 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 20 Jan 2020 11:35:28 +0800 Subject: [PATCH 12/31] ecc: Simplify signature code * cipher/ecc-gost.c (_gcry_ecc_gost_sign): Use implemented function. * cipher/ecc.c (ecc_verify): Remove redundant code. -- Signed-off-by: Tianjia Zhang --- cipher/ecc-gost.c | 18 ++++-------------- cipher/ecc.c | 22 +--------------------- 2 files changed, 5 insertions(+), 35 deletions(-) diff --git a/cipher/ecc-gost.c b/cipher/ecc-gost.c index e9dfc597..44654a47 100644 --- a/cipher/ecc-gost.c +++ b/cipher/ecc-gost.c @@ -45,8 +45,7 @@ _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec, gcry_mpi_t k, dr, sum, ke, x, e; mpi_point_struct I; gcry_mpi_t hash; - const void *abuf; - unsigned int abits, qbits; + unsigned int qbits; if (DBG_CIPHER) log_mpidump ("gost sign hash ", input ); @@ -54,18 +53,9 @@ _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec, qbits = mpi_get_nbits (ec->n); /* Convert the INPUT into an MPI if needed. */ - if (mpi_is_opaque (input)) - { - abuf = mpi_get_opaque (input, &abits); - rc = _gcry_mpi_scan (&hash, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); - if (rc) - return rc; - if (abits > qbits) - mpi_rshift (hash, hash, abits - qbits); - } - else - hash = input; - + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; k = NULL; dr = mpi_alloc (0); diff --git a/cipher/ecc.c b/cipher/ecc.c index 10e11243..1195f5ec 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -813,27 +813,7 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) } else { - if (mpi_is_opaque (data)) - { - const void *abuf; - unsigned int abits, qbits; - gcry_mpi_t a; - - qbits = mpi_get_nbits (ec->n); - - abuf = mpi_get_opaque (data, &abits); - rc = _gcry_mpi_scan (&a, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); - if (!rc) - { - if (abits > qbits) - mpi_rshift (a, a, abits - qbits); - - rc = _gcry_ecc_ecdsa_verify (a, ec, sig_r, sig_s); - _gcry_mpi_release (a); - } - } - else - rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); + rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); } leave: From 6b55246c77089dd372eb1807808111660fd789c7 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 20 Jan 2020 11:42:24 +0800 Subject: [PATCH 13/31] Add elliptic curve SM2 implementation. * configure.ac (enabled_pubkey_ciphers): Add ecc-sm2. * cipher/Makefile.am (EXTRA_libcipher_la_SOURCES): Add ecc-sm2.c. * cipher/pubkey-util.c (_gcry_pk_util_parse_flaglist, _gcry_pk_util_preparse_sigval): Add sm2 flags. * cipher/ecc.c: Support ecc-sm2. * cipher/ecc-common.h: Add declarations for ecc-sm2. * cipher/ecc-sm2.c: New. * src/cipher.h: Define PUBKEY_FLAG_SM2. -- Signed-off-by: Tianjia Zhang --- cipher/Makefile.am | 2 +- cipher/ecc-common.h | 12 + cipher/ecc-sm2.c | 566 +++++++++++++++++++++++++++++++++++++++++++ cipher/ecc.c | 53 +++- cipher/pubkey-util.c | 7 + configure.ac | 3 +- src/cipher.h | 1 + 7 files changed, 630 insertions(+), 14 deletions(-) create mode 100644 cipher/ecc-sm2.c diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 020a9616..10a5ab62 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -87,7 +87,7 @@ EXTRA_libcipher_la_SOURCES = \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ - ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ + ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ diff --git a/cipher/ecc-common.h b/cipher/ecc-common.h index 7fbc950a..b8b7c763 100644 --- a/cipher/ecc-common.h +++ b/cipher/ecc-common.h @@ -125,4 +125,16 @@ gpg_err_code_t _gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec, gcry_mpi_t r, gcry_mpi_t s); +/*-- ecc-sm2.c --*/ +gpg_err_code_t _gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, + gcry_mpi_t input, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, + gcry_sexp_t data_list, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo); +gpg_err_code_t _gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s); + + #endif /*GCRY_ECC_COMMON_H*/ diff --git a/cipher/ecc-sm2.c b/cipher/ecc-sm2.c new file mode 100644 index 00000000..a6341132 --- /dev/null +++ b/cipher/ecc-sm2.c @@ -0,0 +1,566 @@ +/* ecc-sm2.c - Elliptic Curve SM2 implementation + * Copyright (C) 2020 Tianjia Zhang + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "bithelp.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + +#define MPI_NBYTES(m) ((mpi_get_nbits(m) + 7) / 8) + + +/* Key derivation function from X9.63/SECG */ +static gpg_err_code_t +kdf_x9_63 (int algo, const void *in, size_t inlen, void *out, size_t outlen) +{ + gpg_err_code_t rc; + gcry_md_hd_t hd; + int mdlen; + u32 counter = 1; + u32 counter_be; + unsigned char *dgst; + unsigned char *pout = out; + size_t rlen = outlen; + size_t len; + + rc = _gcry_md_open (&hd, algo, 0); + if (rc) + return rc; + + mdlen = _gcry_md_get_algo_dlen (algo); + + while (rlen > 0) + { + counter_be = be_bswap32 (counter); /* cpu_to_be32 */ + counter++; + + _gcry_md_write (hd, in, inlen); + _gcry_md_write (hd, &counter_be, sizeof(counter_be)); + + dgst = _gcry_md_read (hd, algo); + if (dgst == NULL) + { + rc = GPG_ERR_DIGEST_ALGO; + break; + } + + len = mdlen < rlen ? mdlen : rlen; /* min(mdlen, rlen) */ + memcpy (pout, dgst, len); + rlen -= len; + pout += len; + + _gcry_md_reset (hd); + } + + _gcry_md_close (hd); + return rc; +} + + +/* _gcry_ecc_sm2_encrypt description: + * input: + * data[0] : octet string + * output: A new S-expression with the parameters: + * a: c1 : generated ephemeral public key (kG) + * b: c3 : Hash(x2 || IN || y2) + * c: c2 : cipher + * + * sm2_decrypt description: + * in contrast to encrypt + */ +gpg_err_code_t +_gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, gcry_mpi_t input, mpi_ec_t ec) +{ + gpg_err_code_t rc; + const int algo = GCRY_MD_SM3; + gcry_md_hd_t md = NULL; + int mdlen; + unsigned char *dgst; + gcry_mpi_t k = NULL; + mpi_point_struct kG, kP; + gcry_mpi_t x1, y1; + gcry_mpi_t x2, y2; + gcry_mpi_t x2y2 = NULL; + unsigned char *in = NULL; + unsigned int inlen; + unsigned char *raw; + unsigned int rawlen; + unsigned char *cipher = NULL; + int i; + + point_init (&kG); + point_init (&kP); + x1 = mpi_new (0); + y1 = mpi_new (0); + x2 = mpi_new (0); + y2 = mpi_new (0); + + in = _gcry_mpi_get_buffer (input, 0, &inlen, NULL); + if (!in) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + cipher = xtrymalloc (inlen); + if (!cipher) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + /* rand k in [1, n-1] */ + k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM); + + /* [k]G = (x1, y1) */ + _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x1, y1, &kG, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: kG can not be a Point at Infinity!\n"); + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* [k]P = (x2, y2) */ + _gcry_mpi_ec_mul_point (&kP, k, ec->Q, ec); + if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* t = KDF(x2 || y2, klen) */ + x2y2 = _gcry_mpi_ec_ec2os (&kP, ec); + raw = mpi_get_opaque (x2y2, &rawlen); + rawlen = (rawlen + 7) / 8; + + /* skip the prefix '0x04' */ + raw += 1; + rawlen -= 1; + rc = kdf_x9_63 (algo, raw, rawlen, cipher, inlen); + if (rc) + goto leave; + + /* cipher = t xor in */ + for (i = 0; i < inlen; i++) + cipher[i] ^= in[i]; + + /* hash(x2 || IN || y2) */ + mdlen = _gcry_md_get_algo_dlen (algo); + rc = _gcry_md_open (&md, algo, 0); + if (rc) + goto leave; + _gcry_md_write (md, raw, MPI_NBYTES(x2)); + _gcry_md_write (md, in, inlen); + _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2)); + dgst = _gcry_md_read (md, algo); + if (dgst == NULL) + { + rc = GPG_ERR_DIGEST_ALGO; + goto leave; + } + + if (!rc) + { + gcry_mpi_t c1; + gcry_mpi_t c3; + gcry_mpi_t c2; + + c3 = mpi_new (0); + c2 = mpi_new (0); + + c1 = _gcry_ecc_ec2os (x1, y1, ec->p); + _gcry_mpi_set_opaque_copy (c3, dgst, mdlen * 8); + _gcry_mpi_set_opaque_copy (c2, cipher, inlen * 8); + + rc = sexp_build (r_ciph, NULL, + "(enc-val(flags sm2)(sm2(a%M)(b%M)(c%M)))", + c1, c3, c2); + + mpi_free (c1); + mpi_free (c3); + mpi_free (c2); + } + +leave: + _gcry_md_close (md); + mpi_free (x2y2); + mpi_free (k); + + point_free (&kG); + point_free (&kP); + mpi_free (x1); + mpi_free (y1); + mpi_free (x2); + mpi_free (y2); + + xfree (cipher); + xfree (in); + + return rc; +} + + +gpg_err_code_t +_gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t data_list, mpi_ec_t ec) +{ + gpg_err_code_t rc; + gcry_mpi_t data_c1 = NULL; + gcry_mpi_t data_c3 = NULL; + gcry_mpi_t data_c2 = NULL; + + /* + * Extract the data. + */ + rc = sexp_extract_param (data_list, NULL, "/a/b/c", + &data_c1, &data_c3, &data_c2, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("ecc_decrypt d_c1", data_c1); + log_printmpi ("ecc_decrypt d_c3", data_c3); + log_printmpi ("ecc_decrypt d_c2", data_c2); + } + + { + const int algo = GCRY_MD_SM3; + gcry_md_hd_t md = NULL; + int mdlen; + unsigned char *dgst; + mpi_point_struct c1; + mpi_point_struct kP; + gcry_mpi_t x2, y2; + gcry_mpi_t x2y2 = NULL; + unsigned char *in = NULL; + unsigned int inlen; + unsigned char *plain = NULL; + unsigned char *raw; + unsigned int rawlen; + unsigned char *c3 = NULL; + unsigned int c3_len; + int i; + + point_init (&c1); + point_init (&kP); + x2 = mpi_new (0); + y2 = mpi_new (0); + + in = mpi_get_opaque (data_c2, &inlen); + inlen = (inlen + 7) / 8; + plain = xtrymalloc (inlen); + if (!plain) + { + rc = gpg_err_code_from_syserror (); + goto leave_main; + } + + rc = _gcry_ecc_os2ec (&c1, data_c1); + if (rc) + goto leave_main; + + if (!_gcry_mpi_ec_curve_point (&c1, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + /* [d]C1 = (x2, y2), C1 = [k]G */ + _gcry_mpi_ec_mul_point (&kP, ec->d, &c1, ec); + if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + /* t = KDF(x2 || y2, inlen) */ + x2y2 = _gcry_mpi_ec_ec2os (&kP, ec); + raw = mpi_get_opaque (x2y2, &rawlen); + rawlen = (rawlen + 7) / 8; + /* skip the prefix '0x04' */ + raw += 1; + rawlen -= 1; + rc = kdf_x9_63 (algo, raw, rawlen, plain, inlen); + if (rc) + goto leave_main; + + /* plain = C2 xor t */ + for (i = 0; i < inlen; i++) + plain[i] ^= in[i]; + + /* Hash(x2 || IN || y2) == C3 */ + mdlen = _gcry_md_get_algo_dlen (algo); + rc = _gcry_md_open (&md, algo, 0); + if (rc) + goto leave_main; + _gcry_md_write (md, raw, MPI_NBYTES(x2)); + _gcry_md_write (md, plain, inlen); + _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2)); + dgst = _gcry_md_read (md, algo); + if (dgst == NULL) + { + memset (plain, 0, inlen); + rc = GPG_ERR_DIGEST_ALGO; + goto leave_main; + } + c3 = mpi_get_opaque (data_c3, &c3_len); + c3_len = (c3_len + 7) / 8; + if (c3_len != mdlen || memcmp (dgst, c3, c3_len) != 0) + { + memset (plain, 0, inlen); + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + if (!rc) + { + gcry_mpi_t r; + + r = mpi_new (inlen * 8); + _gcry_mpi_set_buffer (r, plain, inlen, 0); + + rc = sexp_build (r_plain, NULL, "(value %m)", r); + + mpi_free (r); + } + + leave_main: + _gcry_md_close (md); + mpi_free (x2y2); + xfree (plain); + + point_free (&c1); + point_free (&kP); + mpi_free (x2); + mpi_free (y2); + } + + leave: + _gcry_mpi_release (data_c1); + _gcry_mpi_release (data_c3); + _gcry_mpi_release (data_c2); + + return rc; +} + + +/* Compute an SM2 signature. + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R and S. + */ +gpg_err_code_t +_gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo) +{ + gpg_err_code_t rc = 0; + int extraloops = 0; + gcry_mpi_t hash; + const void *abuf; + unsigned int abits, qbits; + gcry_mpi_t tmp = NULL; + gcry_mpi_t k = NULL; + gcry_mpi_t rk = NULL; + mpi_point_struct kG; + gcry_mpi_t x1; + + if (DBG_CIPHER) + log_mpidump ("sm2 sign hash ", input); + + qbits = mpi_get_nbits (ec->n); + + /* Convert the INPUT into an MPI if needed. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + point_init (&kG); + x1 = mpi_new (0); + rk = mpi_new (0); + tmp = mpi_new (0); + + for (;;) + { + /* rand k in [1, n-1] */ + if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo) + { + /* Use Pornin's method for deterministic DSA. If this + flag is set, it is expected that HASH is an opaque + MPI with the to be signed hash. That hash is also + used as h1 from 3.2.a. */ + if (!mpi_is_opaque (input)) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d, + abuf, (abits+7)/8, + hashalgo, extraloops); + if (rc) + goto leave; + extraloops++; + } + else + k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM); + + _gcry_dsa_modify_k (k, ec->n, qbits); + + /* [k]G = (x1, y1) */ + _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x1, NULL, &kG, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* r = (e + x1) % n */ + mpi_addm (r, hash, x1, ec->n); + + /* r != 0 && r + k != n */ + if (mpi_cmp_ui (r, 0) == 0) + continue; + mpi_add (rk, r, k); + if (mpi_cmp (rk, ec->n) == 0) + continue; + + /* s = ((d + 1)^-1 * (k - rd)) % n */ + mpi_addm (s, ec->d, GCRYMPI_CONST_ONE, ec->n); + mpi_invm (s, s, ec->n); + mpi_mulm (tmp, r, ec->d, ec->n); + mpi_subm (tmp, k, tmp, ec->n); + mpi_mulm (s, s, tmp, ec->n); + + /* s != 0 */ + if (mpi_cmp_ui (s, 0) == 0) + continue; + + break; /* Okay */ + } + + if (DBG_CIPHER) + { + log_mpidump ("sm2 sign result r ", r); + log_mpidump ("sm2 sign result s ", s); + } + +leave: + point_free (&kG); + mpi_free (k); + mpi_free (x1); + mpi_free (rk); + mpi_free (tmp); + + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* Verify an SM2 signature. + * Check if R and S verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t err = 0; + gcry_mpi_t hash = NULL; + gcry_mpi_t t = NULL; + mpi_point_struct sG, tP; + gcry_mpi_t x1, y1; + unsigned int nbits; + + /* r, s within [1, n-1] */ + if (mpi_cmp_ui (r, 1) < 0 || mpi_cmp (r, ec->n) > 0) + return GPG_ERR_BAD_SIGNATURE; + if (mpi_cmp_ui (s, 1) < 0 || mpi_cmp (s, ec->n) > 0) + return GPG_ERR_BAD_SIGNATURE; + + nbits = mpi_get_nbits (ec->n); + err = _gcry_dsa_normalize_hash (input, &hash, nbits); + if (err) + return err; + + point_init (&sG); + point_init (&tP); + x1 = mpi_new (0); + y1 = mpi_new (0); + t = mpi_new (0); + + /* t = (r + s) % n, t != 0 */ + mpi_addm (t, r, s, ec->n); + if (mpi_cmp_ui (t, 0) == 0) + { + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + /* sG + tP = (x1, y1) */ + _gcry_mpi_ec_mul_point (&sG, s, ec->G, ec); + _gcry_mpi_ec_mul_point (&tP, t, ec->Q, ec); + _gcry_mpi_ec_add_points (&sG, &sG, &tP, ec); + if (_gcry_mpi_ec_get_affine (x1, y1, &sG, ec)) + { + err = GPG_ERR_INV_DATA; + goto leave; + } + + /* R = (e + x1) % n */ + mpi_addm (t, hash, x1, ec->n); + + /* check R == r */ + if (mpi_cmp (t, r)) + { + if (DBG_CIPHER) + { + log_mpidump (" R", t); + log_mpidump (" r", r); + log_mpidump (" s", s); + } + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (DBG_CIPHER) + log_debug ("sm2 verify: Accepted\n"); + + leave: + point_free (&sG); + point_free (&tP); + mpi_free (x1); + mpi_free (y1); + mpi_free (t); + if (hash != input) + mpi_free (hash); + + return err; +} diff --git a/cipher/ecc.c b/cipher/ecc.c index 1195f5ec..49c2c0f6 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -69,6 +69,7 @@ static const char *ecc_names[] = "ecdh", "eddsa", "gost", + "sm2", NULL, }; @@ -723,6 +724,14 @@ ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) rc = sexp_build (r_sig, NULL, "(sig-val(gost(r%M)(s%M)))", sig_r, sig_s); } + else if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + rc = _gcry_ecc_sm2_sign (data, ec, sig_r, sig_s, + ctx.flags, ctx.hash_algo); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(sm2(r%M)(s%M)))", sig_r, sig_s); + } else { rc = _gcry_ecc_ecdsa_sign (data, ec, sig_r, sig_s, @@ -811,6 +820,10 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) { rc = _gcry_ecc_gost_verify (data, ec, sig_r, sig_s); } + else if ((sigflags & PUBKEY_FLAG_SM2)) + { + rc = _gcry_ecc_sm2_verify (data, ec, sig_r, sig_s); + } else { rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); @@ -918,6 +931,13 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + /* All encryption will be done, return it. */ + rc = _gcry_ecc_sm2_encrypt (r_ciph, data, ec); + goto leave; + } + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */ { mpi_point_struct R; /* Result that we return. */ @@ -1041,18 +1061,6 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, (nbits = ecc_get_nbits (keyparms))); - /* - * Extract the data. - */ - rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx); - if (rc) - goto leave; - rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL); - if (rc) - goto leave; - if (DBG_CIPHER) - log_printmpi ("ecc_decrypt d_e", data_e); - /* * Extract the key. */ @@ -1066,6 +1074,27 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + /* + * Extract the data. + */ + rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx); + if (rc) + goto leave; + if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + /* All decryption will be done, return it. */ + rc = _gcry_ecc_sm2_decrypt (r_plain, l1, ec); + goto leave; + } + else + { + rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt d_e", data_e); + } + if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK)) enable_specific_point_validation = 1; else diff --git a/cipher/pubkey-util.c b/cipher/pubkey-util.c index 4a6bf462..c52185de 100644 --- a/cipher/pubkey-util.c +++ b/cipher/pubkey-util.c @@ -81,6 +81,11 @@ _gcry_pk_util_parse_flaglist (gcry_sexp_t list, encoding = PUBKEY_ENC_RAW; flags |= PUBKEY_FLAG_RAW_FLAG; /* Explicitly given. */ } + else if (!memcmp (s, "sm2", 3)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_SM2 | PUBKEY_FLAG_RAW_FLAG; + } else if (!igninvflag) rc = GPG_ERR_INV_FLAG; break; @@ -429,6 +434,8 @@ _gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, const char **algo_names, *r_eccflags = PUBKEY_FLAG_EDDSA; if (!strcmp (name, "gost")) *r_eccflags = PUBKEY_FLAG_GOST; + if (!strcmp (name, "sm2")) + *r_eccflags = PUBKEY_FLAG_SM2; } *r_parms = l2; diff --git a/configure.ac b/configure.ac index 57884162..74ca9be0 100644 --- a/configure.ac +++ b/configure.ac @@ -2545,7 +2545,8 @@ LIST_MEMBER(ecc, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ - ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo" + ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo \ + ecc-sm2.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi diff --git a/src/cipher.h b/src/cipher.h index 5aac19f1..1fe50890 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -42,6 +42,7 @@ #define PUBKEY_FLAG_GOST (1 << 13) #define PUBKEY_FLAG_NO_KEYTEST (1 << 14) #define PUBKEY_FLAG_DJB_TWEAK (1 << 15) +#define PUBKEY_FLAG_SM2 (1 << 16) enum pk_operation From aa9c78afa1d867bb7b9b3c695cf31a832c9419e5 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 20 Jan 2020 11:42:25 +0800 Subject: [PATCH 14/31] tests: Add basic test cases for sm2 * tests/basic.c (check_pubkey): Add test cases for ecc-sm2. -- Original change was modified by gniibe to limit only for ECDSA. Signed-off-by: Tianjia Zhang --- tests/basic.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/basic.c b/tests/basic.c index a3454abf..beb5a5b2 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -12594,6 +12594,16 @@ check_pubkey_sign_ecdsa (int n, gcry_sexp_t skey, gcry_sexp_t pkey) /* */ "000102030405060708090A0B0C0D0E0F#))", 0 }, + { 256, + "(data (flags sm2)\n" + " (hash sm3 #112233445566778899AABBCCDDEEFF00" + /* */ "123456789ABCDEF0123456789ABCDEF0#))", + 0, + "(data (flags sm2)\n" + " (hash sm3 #B524F552CD82B8B028476E005C377FB1" + /* */ "9A87E6FC682D48BB5D42E3D9B9EFFE76#))", + 0 + }, { 0, NULL } }; @@ -13270,6 +13280,29 @@ check_pubkey (void) " F1050E2E8FF49C85C23CBE7DED0E7CE6A594896B8F62888F" " DBC5C8821305E2EA42BF01E37300116281#)))\n", + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } + }, + { /* sm2 test */ + GCRY_PK_ECDSA, FLAG_SIGN, + { + "(private-key\n" + " (ecc\n" + " (curve sm2p256v1)\n" + " (q #04" + " 8759389A34AAAD07ECF4E0C8C2650A4459C8D926EE2378324E0261C52538CB47" + " 7528106B1E0B7C8DD5FF29A9C86A89065656EB33154BC0556091EF8AC9D17D78#)" + " (d #41EBDBA9C98CBECCE7249CF18BFD427FF8EA0B2FAB7B9D305D9D9BF4DB6ADFC2#)" + "))", + + "(public-key\n" + " (ecc\n" + " (curve sm2p256v1)\n" + " (q #04" + " 8759389A34AAAD07ECF4E0C8C2650A4459C8D926EE2378324E0261C52538CB47" + " 7528106B1E0B7C8DD5FF29A9C86A89065656EB33154BC0556091EF8AC9D17D78#)" + "))", + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } } From 79ed620ec46adbb08f5cea6a4865a95a436e4109 Mon Sep 17 00:00:00 2001 From: Marvin W Date: Wed, 22 Jan 2020 19:36:13 +0200 Subject: [PATCH 15/31] Set vZZ.16b register to zero before use in armv8 gcm implementation * cipher/cipher-gcm-armv8-aarch64-ce.S (_gcry_ghash_setup_armv8_ce_pmull): Set vZZ to zero. -- Reported by "Marvin W." at https://dev.gnupg.org/D497: > > The register vZZ.16b is expected to be always 0 throughout the macros > in cipher/cipher-gcm-armv8-aarch64-ce.S. The PMUL_128x128 and REDUCTION > macros are used in gcry_ghash_setup_armv8_ce_pmull function, however that > function does not set vZZ.16b to zero. If previous use left `vZZ.16b > non-zero before gcry_ghash_setup_armv8_ce_pmull is called, this will cause > invalid GCM auth tag results. > > The patch resets vZZ.16b to 0 at the beginning of > gcry_ghash_setup_armv8_ce_pmull. > [jk: from differential web-ui to commit] Signed-off-by: Jussi Kivilinna --- cipher/cipher-gcm-armv8-aarch64-ce.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S index b0c2cccc..877207d3 100644 --- a/cipher/cipher-gcm-armv8-aarch64-ce.S +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -385,6 +385,8 @@ _gcry_ghash_setup_armv8_ce_pmull: GET_DATA_POINTER(x2, .Lrconst) + eor vZZ.16b, vZZ.16b, vZZ.16b + /* H¹ */ ld1 {rh1.16b}, [x0] rbit rh1.16b, rh1.16b From 4aa8ff904262f331abbb8c988069a7029ca13502 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 22 Jan 2020 19:36:13 +0200 Subject: [PATCH 16/31] tests/basic: add vector cluttering to detect implementation bugs * src/global.c (_gcry_check_version): Fix missing newline. * src/basic.c (ALWAYS_INLINE, CLUTTER_REGISTER_*, prepare_vector_data) (clutter_vector_registers): New. (progress_handler): Make static function. (check_bulk_cipher_modes, check_one_cipher_core_reset) (check_one_cipher_core, check_one_md, check_one_md_multi) (check_one_md_final, check_one_mac): Clutter vector registers before gcry_* calls to cipher/md/mac algorithms. -- Signed-off-by: Jussi Kivilinna --- src/global.c | 3 +- tests/basic.c | 290 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 290 insertions(+), 3 deletions(-) diff --git a/src/global.c b/src/global.c index d82c680a..be65df54 100644 --- a/src/global.c +++ b/src/global.c @@ -261,7 +261,8 @@ _gcry_check_version (const char *req_version) /* Compare version numbers. */ if ( my_major > rq_major || (my_major == rq_major && my_minor > rq_minor) - || (my_major == rq_major && my_minor == rq_minor && my_micro > rq_micro) + || (my_major == rq_major && my_minor == rq_minor + && my_micro > rq_micro) || (my_major == rq_major && my_minor == rq_minor && my_micro == rq_micro)) { diff --git a/tests/basic.c b/tests/basic.c index beb5a5b2..812bd89d 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -33,6 +33,10 @@ #define PGM "basic" #include "t-common.h" +#if __GNUC__ >= 4 +# define ALWAYS_INLINE __attribute__((always_inline)) +#endif + typedef struct test_spec_pubkey_key { const char *secret; @@ -191,7 +195,7 @@ show_mac_not_available (int algo) -void +static void progress_handler (void *cb_data, const char *what, int printchar, int current, int total) { @@ -207,6 +211,239 @@ progress_handler (void *cb_data, const char *what, int printchar, fflush (stdout); } + +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define CLUTTER_VECTOR_REGISTER_AMD64 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 16 +#elif defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) +# define CLUTTER_VECTOR_REGISTER_I386 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 8 +#elif defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) && \ + (defined(__ARM_FEATURE_SIMD32) || defined(__ARM_NEON)) +# define CLUTTER_VECTOR_REGISTER_AARCH64 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 32 +#elif defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) && \ + (defined(__ARM_FEATURE_SIMD32) || defined(__ARM_NEON)) +# define CLUTTER_VECTOR_REGISTER_NEON 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 16 +#endif + + +#ifdef CLUTTER_VECTOR_REGISTER_COUNT +static void +prepare_vector_data(unsigned char data[CLUTTER_VECTOR_REGISTER_COUNT][16]) +{ + static unsigned char basedata[16] = + { + 0xd7, 0xfe, 0x5c, 0x4b, 0x58, 0xfe, 0xf4, 0xb6, + 0xed, 0x2f, 0x31, 0xc9, 0x1d, 0xd3, 0x62, 0x8d + }; + int j, i; + + for (i = 0; i < CLUTTER_VECTOR_REGISTER_COUNT; i++) + { + for (j = 0; j < 16; j++) + { + data[i][j] = basedata[(i + j) % 16]; + } + + for (j = 0; j < 16; j++) + { + basedata[j] -= j; + } + } +} +#endif + + +static inline ALWAYS_INLINE void +clutter_vector_registers(void) +{ +#ifdef CLUTTER_VECTOR_REGISTER_COUNT + unsigned char data[CLUTTER_VECTOR_REGISTER_COUNT][16]; +#if defined(CLUTTER_VECTOR_REGISTER_AARCH64) || \ + defined(CLUTTER_VECTOR_REGISTER_NEON) + static int init; + static int have_neon; + + if (!init) + { + char *string; + + string = gcry_get_config (0, "hwflist"); + if (string) + { + have_neon = (strstr(string, "arm-neon:") != NULL); + xfree(string); + } + init = 1; + } + + if (!have_neon) + return; +#elif defined(CLUTTER_VECTOR_REGISTER_I386) + static int init; + static int have_ssse3; + + if (!init) + { + char *string; + + string = gcry_get_config (0, "hwflist"); + if (string) + { + have_ssse3 = (strstr(string, "intel-ssse3:") != NULL); + xfree(string); + } + init = 1; + } + + if (!have_ssse3) + return; +#endif + + prepare_vector_data(data); + +#if defined(CLUTTER_VECTOR_REGISTER_AMD64) + asm volatile("movdqu %[data0], %%xmm0\n" + "movdqu %[data1], %%xmm1\n" + "movdqu %[data2], %%xmm2\n" + "movdqu %[data3], %%xmm3\n" + "movdqu %[data4], %%xmm4\n" + "movdqu %[data5], %%xmm5\n" + "movdqu %[data6], %%xmm6\n" + "movdqu %[data7], %%xmm7\n" + "movdqu %[data8], %%xmm8\n" + "movdqu %[data9], %%xmm9\n" + "movdqu %[data10], %%xmm10\n" + "movdqu %[data11], %%xmm11\n" + "movdqu %[data12], %%xmm12\n" + "movdqu %[data13], %%xmm13\n" + "movdqu %[data14], %%xmm14\n" + "movdqu %[data15], %%xmm15\n" + : + : [data0] "m" (*data[0]), + [data1] "m" (*data[1]), + [data2] "m" (*data[2]), + [data3] "m" (*data[3]), + [data4] "m" (*data[4]), + [data5] "m" (*data[5]), + [data6] "m" (*data[6]), + [data7] "m" (*data[7]), + [data8] "m" (*data[8]), + [data9] "m" (*data[9]), + [data10] "m" (*data[10]), + [data11] "m" (*data[11]), + [data12] "m" (*data[12]), + [data13] "m" (*data[13]), + [data14] "m" (*data[14]), + [data15] "m" (*data[15]) + : "memory" +#ifdef __SSE2__ + ,"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + "xmm15" +#endif + ); +#elif defined(CLUTTER_VECTOR_REGISTER_I386) + asm volatile("movdqu %[data0], %%xmm0\n" + "movdqu %[data1], %%xmm1\n" + "movdqu %[data2], %%xmm2\n" + "movdqu %[data3], %%xmm3\n" + "movdqu %[data4], %%xmm4\n" + "movdqu %[data5], %%xmm5\n" + "movdqu %[data6], %%xmm6\n" + "movdqu %[data7], %%xmm7\n" + : + : [data0] "m" (*data[0]), + [data1] "m" (*data[1]), + [data2] "m" (*data[2]), + [data3] "m" (*data[3]), + [data4] "m" (*data[4]), + [data5] "m" (*data[5]), + [data6] "m" (*data[6]), + [data7] "m" (*data[7]) + : "memory" +#ifdef __SSE2__ + ,"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" +#endif + ); +#elif defined(CLUTTER_VECTOR_REGISTER_AARCH64) + asm volatile("mov x0, %[ptr]\n" + "ld1 {v0.16b}, [x0], #16\n" + "ld1 {v1.16b}, [x0], #16\n" + "ld1 {v2.16b}, [x0], #16\n" + "ld1 {v3.16b}, [x0], #16\n" + "ld1 {v4.16b}, [x0], #16\n" + "ld1 {v5.16b}, [x0], #16\n" + "ld1 {v6.16b}, [x0], #16\n" + "ld1 {v7.16b}, [x0], #16\n" + "ld1 {v8.16b}, [x0], #16\n" + "ld1 {v9.16b}, [x0], #16\n" + "ld1 {v10.16b}, [x0], #16\n" + "ld1 {v11.16b}, [x0], #16\n" + "ld1 {v12.16b}, [x0], #16\n" + "ld1 {v13.16b}, [x0], #16\n" + "ld1 {v14.16b}, [x0], #16\n" + "ld1 {v15.16b}, [x0], #16\n" + "ld1 {v16.16b}, [x0], #16\n" + "ld1 {v17.16b}, [x0], #16\n" + "ld1 {v18.16b}, [x0], #16\n" + "ld1 {v19.16b}, [x0], #16\n" + "ld1 {v20.16b}, [x0], #16\n" + "ld1 {v21.16b}, [x0], #16\n" + "ld1 {v22.16b}, [x0], #16\n" + "ld1 {v23.16b}, [x0], #16\n" + "ld1 {v24.16b}, [x0], #16\n" + "ld1 {v25.16b}, [x0], #16\n" + "ld1 {v26.16b}, [x0], #16\n" + "ld1 {v27.16b}, [x0], #16\n" + "ld1 {v28.16b}, [x0], #16\n" + "ld1 {v29.16b}, [x0], #16\n" + "ld1 {v30.16b}, [x0], #16\n" + "ld1 {v31.16b}, [x0], #16\n" + : + : [ptr] "r" (data) + : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "memory"); +#elif defined(CLUTTER_VECTOR_REGISTER_NEON) + asm volatile("mov r0, %[ptr]\n" + "vld1.64 {q0}, [r0]!\n" + "vld1.64 {q1}, [r0]!\n" + "vld1.64 {q2}, [r0]!\n" + "vld1.64 {q3}, [r0]!\n" + "vld1.64 {q4}, [r0]!\n" + "vld1.64 {q5}, [r0]!\n" + "vld1.64 {q6}, [r0]!\n" + "vld1.64 {q7}, [r0]!\n" + "vld1.64 {q8}, [r0]!\n" + "vld1.64 {q9}, [r0]!\n" + "vld1.64 {q10}, [r0]!\n" + "vld1.64 {q11}, [r0]!\n" + "vld1.64 {q12}, [r0]!\n" + "vld1.64 {q13}, [r0]!\n" + "vld1.64 {q14}, [r0]!\n" + "vld1.64 {q15}, [r0]!\n" + : + : [ptr] "r" (data) + : "r0", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", + "memory"); +#endif + +#endif /* CLUTTER_VECTOR_REGISTER_COUNT */ +} + + + static void check_cbc_mac_cipher (void) { @@ -8280,7 +8517,9 @@ check_bulk_cipher_modes (void) goto leave; } + clutter_vector_registers(); err = gcry_cipher_setkey (hde, tv[i].key, tv[i].keylen); + clutter_vector_registers(); if (!err) err = gcry_cipher_setkey (hdd, tv[i].key, tv[i].keylen); if (err) @@ -8296,7 +8535,9 @@ check_bulk_cipher_modes (void) goto leave; } + clutter_vector_registers(); err = gcry_cipher_setiv (hde, tv[i].iv, tv[i].ivlen); + clutter_vector_registers(); if (!err) err = gcry_cipher_setiv (hdd, tv[i].iv, tv[i].ivlen); if (err) @@ -8309,6 +8550,7 @@ check_bulk_cipher_modes (void) for (j=0; j < buflen; j++) buffer[j] = ((j & 0xff) ^ ((j >> 8) & 0xff)); + clutter_vector_registers(); err = gcry_cipher_encrypt (hde, outbuf, buflen, buffer, buflen); if (err) { @@ -8330,6 +8572,7 @@ check_bulk_cipher_modes (void) fail ("encrypt mismatch (algo %d, mode %d)\n", tv[i].algo, tv[i].mode); + clutter_vector_registers(); err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0); if (err) { @@ -8409,6 +8652,7 @@ check_one_cipher_core_reset (gcry_cipher_hd_t hd, int algo, int mode, int pass, if (mode == GCRY_CIPHER_MODE_OCB || mode == GCRY_CIPHER_MODE_CCM) { + clutter_vector_registers(); err = gcry_cipher_setiv (hd, iv, sizeof(iv)); if (err) { @@ -8535,6 +8779,7 @@ check_one_cipher_core (int algo, int mode, int flags, goto err_out_free; } + clutter_vector_registers(); err = gcry_cipher_setkey (hd, key, keylen); if (err) { @@ -8547,6 +8792,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out, nplain, plain, nplain); if (err) { @@ -8558,6 +8804,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_gettag (hd, tag, taglen); if (err) { @@ -8575,6 +8822,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, in, nplain, out, nplain); if (err) { @@ -8586,6 +8834,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8605,6 +8854,7 @@ check_one_cipher_core (int algo, int mode, int flags, goto err_out_free; memcpy (out, plain, nplain); + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out, nplain, NULL, 0); if (err) { @@ -8639,6 +8889,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, out, nplain, NULL, 0); if (err) { @@ -8651,6 +8902,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8677,6 +8929,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out + pos, piecelen, plain + pos, piecelen); if (err) @@ -8694,6 +8947,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_gettag (hd, tag, taglen); if (err) { @@ -8723,6 +8977,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, in + pos, piecelen, out + pos, piecelen); if (err) { @@ -8739,6 +8994,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8767,6 +9023,7 @@ check_one_cipher_core (int algo, int mode, int flags, piecelen = nplain - pos; memcpy (out + pos, plain + pos, piecelen); + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out + pos, piecelen, NULL, 0); if (err) { @@ -8795,6 +9052,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, out + pos, piecelen, NULL, 0); if (err) { @@ -9104,6 +9362,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (key && klen) { + clutter_vector_registers(); err = gcry_md_setkey (hd, key, klen); if (err) { @@ -9131,6 +9390,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (key && klen) { + clutter_vector_registers(); err = gcry_md_setkey (hd2, key, klen); if (err) { @@ -9149,10 +9409,12 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, gcry_md_reset (hd); gcry_md_reset (hd2); + clutter_vector_registers(); gcry_md_write (hd, buf, i); for (j = 0; j < i; j++) gcry_md_write (hd2, &buf[j], 1); + clutter_vector_registers(); p1 = gcry_md_read (hd, algo); p2 = gcry_md_read (hd2, algo); if (memcmp (p1, p2, mdlen)) @@ -9196,6 +9458,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_md_write (hd, aaa, piecelen); left -= piecelen; @@ -9212,6 +9475,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_md_write (hd, aaa, piecelen); left -= piecelen; @@ -9223,8 +9487,12 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, } } else - gcry_md_write (hd, data, len); + { + clutter_vector_registers(); + gcry_md_write (hd, data, len); + } + clutter_vector_registers(); err = gcry_md_copy (&hd2, hd); if (err) { @@ -9235,6 +9503,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (!xof) { + clutter_vector_registers(); p = gcry_md_read (hd2, algo); if (memcmp (p, expect, mdlen)) @@ -9255,12 +9524,14 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, char buf[1000]; int outmax = sizeof(buf) > elen ? elen : sizeof(buf); + clutter_vector_registers(); err = gcry_md_copy (&hd, hd2); if (err) { fail ("algo %d, gcry_md_copy failed: %s\n", algo, gpg_strerror (err)); } + clutter_vector_registers(); err = gcry_md_extract(hd2, algo, buf, outmax); if (err) { @@ -9283,6 +9554,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, memset(buf, 0, sizeof(buf)); /* Extract one byte at time. */ + clutter_vector_registers(); for (i = 0; i < outmax && !err; i++) err = gcry_md_extract(hd, algo, &buf[i], 1); if (err) @@ -9334,6 +9606,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, /* Extract large chucks, total 1000000 additional bytes. */ for (i = 0; i < 1000; i++) { + clutter_vector_registers(); err = gcry_md_extract(hd, algo, buf, 1000); if (!err) gcry_md_write(crc1, buf, 1000); @@ -9356,6 +9629,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (piecelen > left) piecelen = left; + clutter_vector_registers(); err = gcry_md_extract (hd2, algo, buf, piecelen); if (!err) gcry_md_write(crc2, buf, piecelen); @@ -9373,7 +9647,9 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, piecelen = piecelen * 2 - ((piecelen != startlen) ? startlen : 0); } + clutter_vector_registers(); p1 = gcry_md_read (crc1, crcalgo); + clutter_vector_registers(); p2 = gcry_md_read (crc2, crcalgo); if (memcmp (p1, p2, crclen)) @@ -9449,6 +9725,7 @@ check_one_md_multi (int algo, const char *data, int len, const char *expect) iovcnt++; assert (iovcnt <= DIM (iov)); + clutter_vector_registers(); err = gcry_md_hash_buffers (algo, 0, digest, iov, iovcnt); if (err) { @@ -9498,6 +9775,7 @@ check_one_md_final(int algo, const char *expect, unsigned int expectlen) for (i = 0; i < sizeof(inbuf); i++) inbuf[i] = i; + clutter_vector_registers(); gcry_md_hash_buffer (algo, xorbuf, NULL, 0); for (i = 1; i < sizeof(inbuf); i++) { @@ -11336,6 +11614,7 @@ check_one_mac (int algo, const char *data, int datalen, return; } + clutter_vector_registers(); err = gcry_mac_setkey (hd, key, keylen); if (err) fail("algo %d, mac gcry_mac_setkey failed: %s\n", algo, gpg_strerror (err)); @@ -11344,6 +11623,7 @@ check_one_mac (int algo, const char *data, int datalen, if (ivlen && iv) { + clutter_vector_registers(); err = gcry_mac_setiv (hd, iv, ivlen); if (err) fail("algo %d, mac gcry_mac_ivkey failed: %s\n", algo, @@ -11356,6 +11636,7 @@ check_one_mac (int algo, const char *data, int datalen, { for (i = 0; i < datalen; i++) { + clutter_vector_registers(); err = gcry_mac_write (hd, &data[i], 1); if (err) fail("algo %d, mac gcry_mac_write [buf-offset: %d] failed: %s\n", @@ -11389,6 +11670,7 @@ check_one_mac (int algo, const char *data, int datalen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_mac_write (hd, aaa, piecelen); left -= piecelen; @@ -11405,6 +11687,7 @@ check_one_mac (int algo, const char *data, int datalen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_mac_write (hd, aaa, piecelen); left -= piecelen; @@ -11417,6 +11700,7 @@ check_one_mac (int algo, const char *data, int datalen, } else { + clutter_vector_registers(); err = gcry_mac_write (hd, data, datalen); } @@ -11426,11 +11710,13 @@ check_one_mac (int algo, const char *data, int datalen, goto out; } + clutter_vector_registers(); err = gcry_mac_verify (hd, expect, maclen); if (err) fail("algo %d, mac gcry_mac_verify failed: %s\n", algo, gpg_strerror (err)); macoutlen = maclen; + clutter_vector_registers(); err = gcry_mac_read (hd, p, &macoutlen); if (err) fail("algo %d, mac gcry_mac_read failed: %s\n", algo, gpg_strerror (err)); From 8ebbd8545a209e78ea4fc0cea9dc2e4a3acd9be2 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 22 Jan 2020 19:39:41 +0200 Subject: [PATCH 17/31] Register DCO for H.J. Lu -- Signed-off-by: Jussi Kivilinna --- AUTHORS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AUTHORS b/AUTHORS index 8271c0d8..b72992f8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -157,6 +157,9 @@ Dmitry Eremin-Solenikov Dmitry Kasatkin 2012-12-14:50CAE2DB.80302@intel.com: +H.J. Lu +2020-01-19:20200119135241.GA4970@gmail.com: + Jia Zhang 2017-10-17:59E56E30.9060503@alibaba-inc.com: From 4c88c2bd2a418435506325cd53246acaaa52750c Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 19 Jan 2020 06:40:22 -0800 Subject: [PATCH 18/31] x86: Add .note.gnu.property section for Intel CET * configure.ac: Include in for assembly codes. -- When Intel CET is enabled, include in for assembly codes to mark Intel CET support. Signed-off-by: H.J. Lu --- configure.ac | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/configure.ac b/configure.ac index 74ca9be0..f31b7558 100644 --- a/configure.ac +++ b/configure.ac @@ -97,6 +97,12 @@ AH_TOP([ AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 +/* Add .note.gnu.property section for Intel CET in assembler sources + when CET is enabled. */ +#if defined(__ASSEMBLER__) && defined(__CET__) +# include +#endif + /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK From 24b4d5c10a97aaf82ac7402cc3a5b429d580cd66 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 19 Jan 2020 06:40:23 -0800 Subject: [PATCH 19/31] mpi: Add .note.gnu.property section for Intel CET * mpi/config.links: Include in . -- When Intel CET is enabled, include in for assembly codes to mark Intel CET support. Signed-off-by: H.J. Lu --- mpi/config.links | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mpi/config.links b/mpi/config.links index 3ead4f08..4f43b732 100644 --- a/mpi/config.links +++ b/mpi/config.links @@ -382,6 +382,16 @@ if test x"$mpi_cpu_arch" = x ; then mpi_cpu_arch="unknown" fi +# Add .note.gnu.property section for Intel CET in assembler sources +# when CET is enabled. */ +if test x"$mpi_cpu_arch" = xx86 ; then + cat <> ./mpi/asm-syntax.h + +#if defined(__ASSEMBLER__) && defined(__CET__) +# include +#endif +EOF +fi # Make sysdep.h echo '/* created by config.links - do not edit */' >./mpi/sysdep.h From 22e577071790834f07753c42a191a568c9f2644d Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 19 Jan 2020 06:40:24 -0800 Subject: [PATCH 20/31] amd64: Always include in cipher assembly codes * cipher/camellia-aesni-avx-amd64.S: Always include . * cipher/camellia-aesni-avx2-amd64.S: Likewise. * cipher/serpent-avx2-amd64.S: Likewise. -- When Intel CET is enabled, we need to include in assembly codes to mark Intel CET support even if it is empty. We should always include in cipher amd64 assembly codes so that they will be marked for Intel CET support when compiling for i686. Signed-off-by: H.J. Lu --- cipher/camellia-aesni-avx-amd64.S | 3 ++- cipher/camellia-aesni-avx2-amd64.S | 3 ++- cipher/serpent-avx2-amd64.S | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S index e16d4f61..4671bcfe 100644 --- a/cipher/camellia-aesni-avx-amd64.S +++ b/cipher/camellia-aesni-avx-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S index cc01c774..517e6880 100644 --- a/cipher/camellia-aesni-avx2-amd64.S +++ b/cipher/camellia-aesni-avx2-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S index 9b17c2bd..dcee9b62 100644 --- a/cipher/serpent-avx2-amd64.S +++ b/cipher/serpent-avx2-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \ defined(ENABLE_AVX2_SUPPORT) From cb9f0a2df8225eed071ae0a56265e38e9f6ff184 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 19 Jan 2020 06:40:25 -0800 Subject: [PATCH 21/31] i386: Add _CET_ENDBR to indirect jump targets * mpi/i386/mpih-add1.S (_gcry_mpih_add_n): Save and restore %ebx if IBT is enabed. Add _CET_ENDBR to indirect jump targets and adjust jump destination for _CET_ENDBR. * mpi/i386/mpih-sub1.S (_gcry_mpih_sub_n): Likewise. -- i386 mpih-add1.S and mpih-sub1.S use a trick to implment jump tables with LEA. We can't use conditional branches nor normal jump tables since jump table entries use EFLAGS set by jump table index. This patch adds _CET_ENDBR to indirect jump targets and adjust destination for _CET_ENDBR. Signed-off-by: H.J. Lu --- mpi/i386/mpih-add1.S | 37 +++++++++++++++++++++++++++++++++++++ mpi/i386/mpih-sub1.S | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/mpi/i386/mpih-add1.S b/mpi/i386/mpih-add1.S index 32091f34..2f1ae931 100644 --- a/mpi/i386/mpih-add1.S +++ b/mpi/i386/mpih-add1.S @@ -55,6 +55,11 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:) movl 20(%esp),%edx /* s2_ptr */ movl 24(%esp),%ecx /* size */ +#if defined __CET__ && (__CET__ & 1) != 0 + pushl %ebx + CFI_PUSH(%ebx) +#endif + movl %ecx,%eax shrl $3,%ecx /* compute count for unrolled loop */ negl %eax @@ -66,6 +71,9 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:) subl %eax,%esi /* ... by a constant when we ... */ subl %eax,%edx /* ... enter the loop */ shrl $2,%eax /* restore previous value */ +#if defined __CET__ && (__CET__ & 1) != 0 + leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ +#endif #ifdef PIC /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ @@ -77,30 +85,54 @@ L0: leal (%eax,%eax,8),%eax #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax +#endif +#if defined __CET__ && (__CET__ & 1) != 0 + addl %ebx,%eax /* Adjust for endbr32 */ #endif jmp *%eax /* jump into loop */ ALIGN (3) Loop: movl (%esi),%eax adcl (%edx),%eax movl %eax,(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 4(%esi),%eax adcl 4(%edx),%eax movl %eax,4(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 8(%esi),%eax adcl 8(%edx),%eax movl %eax,8(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 12(%esi),%eax adcl 12(%edx),%eax movl %eax,12(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 16(%esi),%eax adcl 16(%edx),%eax movl %eax,16(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 20(%esi),%eax adcl 20(%edx),%eax movl %eax,20(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 24(%esi),%eax adcl 24(%edx),%eax movl %eax,24(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 28(%esi),%eax adcl 28(%edx),%eax movl %eax,28(%edi) @@ -113,6 +145,11 @@ Loop: movl (%esi),%eax sbbl %eax,%eax negl %eax +#if defined __CET__ && (__CET__ & 1) != 0 + popl %ebx + CFI_POP(%ebx) +#endif + popl %esi CFI_POP(%esi) popl %edi diff --git a/mpi/i386/mpih-sub1.S b/mpi/i386/mpih-sub1.S index 501c4a9f..01e977e5 100644 --- a/mpi/i386/mpih-sub1.S +++ b/mpi/i386/mpih-sub1.S @@ -56,6 +56,11 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:) movl 20(%esp),%edx /* s2_ptr */ movl 24(%esp),%ecx /* size */ +#if defined __CET__ && (__CET__ & 1) != 0 + pushl %ebx + CFI_PUSH(%ebx) +#endif + movl %ecx,%eax shrl $3,%ecx /* compute count for unrolled loop */ negl %eax @@ -67,6 +72,9 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:) subl %eax,%esi /* ... by a constant when we ... */ subl %eax,%edx /* ... enter the loop */ shrl $2,%eax /* restore previous value */ +#if defined __CET__ && (__CET__ & 1) != 0 + leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ +#endif #ifdef PIC /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ @@ -78,30 +86,54 @@ L0: leal (%eax,%eax,8),%eax #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax +#endif +#if defined __CET__ && (__CET__ & 1) != 0 + addl %ebx,%eax /* Adjust for endbr32 */ #endif jmp *%eax /* jump into loop */ ALIGN (3) Loop: movl (%esi),%eax sbbl (%edx),%eax movl %eax,(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 4(%esi),%eax sbbl 4(%edx),%eax movl %eax,4(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 8(%esi),%eax sbbl 8(%edx),%eax movl %eax,8(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 12(%esi),%eax sbbl 12(%edx),%eax movl %eax,12(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 16(%esi),%eax sbbl 16(%edx),%eax movl %eax,16(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 20(%esi),%eax sbbl 20(%edx),%eax movl %eax,20(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 24(%esi),%eax sbbl 24(%edx),%eax movl %eax,24(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 28(%esi),%eax sbbl 28(%edx),%eax movl %eax,28(%edi) @@ -114,6 +146,11 @@ Loop: movl (%esi),%eax sbbl %eax,%eax negl %eax +#if defined __CET__ && (__CET__ & 1) != 0 + popl %ebx + CFI_POP(%ebx) +#endif + popl %esi CFI_POP(%esi) popl %edi From 5f098f7e6ceb899ac27a0a30ee036de5f1be4e3d Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 22 Jan 2020 21:31:49 +0200 Subject: [PATCH 22/31] mpi/i386: fix DWARF CFI for _gcry_mpih_sub_n and _gcry_mpih_add_n * mpi/i386/mpih-add1.S (_gcry_mpih_add_n) [PIC]: Adjust CFI CFA offset when making call and restoring stack pointer. * mpi/i386/mpih-sub1.S (_gcry_mpih_sub_n) [PIC]: Ditto. -- Signed-off-by: Jussi Kivilinna --- mpi/i386/mpih-add1.S | 2 ++ mpi/i386/mpih-sub1.S | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mpi/i386/mpih-add1.S b/mpi/i386/mpih-add1.S index 2f1ae931..de78a0cb 100644 --- a/mpi/i386/mpih-add1.S +++ b/mpi/i386/mpih-add1.S @@ -78,10 +78,12 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:) /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ call L0 + CFI_ADJUST_CFA_OFFSET(4) L0: leal (%eax,%eax,8),%eax addl (%esp),%eax addl $(Loop-L0-3),%eax addl $4,%esp + CFI_ADJUST_CFA_OFFSET(-4) #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax diff --git a/mpi/i386/mpih-sub1.S b/mpi/i386/mpih-sub1.S index 01e977e5..2bdc1438 100644 --- a/mpi/i386/mpih-sub1.S +++ b/mpi/i386/mpih-sub1.S @@ -79,10 +79,12 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:) /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ call L0 + CFI_ADJUST_CFA_OFFSET(4) L0: leal (%eax,%eax,8),%eax addl (%esp),%eax addl $(Loop-L0-3),%eax addl $4,%esp + CFI_ADJUST_CFA_OFFSET(-4) #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax From 8b31091da092e22dba78b2402c2f436bbffc1c73 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 22 Jan 2020 22:31:12 +0200 Subject: [PATCH 23/31] sexp: fix cast from 'int' pointer to 'size_t' pointer * src/sexp.c (do_vsexp_sscan): Change 'datalen' from 'int' to 'size_t'; Remove &datalen pointer cast to 'size_t *' type. -- Signed-off-by: Jussi Kivilinna --- src/sexp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sexp.c b/src/sexp.c index 57d77d29..17341ba5 100644 --- a/src/sexp.c +++ b/src/sexp.c @@ -1114,7 +1114,7 @@ do_vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff, int hexcount = 0; int b64count = 0; int quoted_esc = 0; - int datalen = 0; + size_t datalen = 0; size_t dummy_erroff; struct make_space_ctx c; int arg_counter = 0; @@ -1354,7 +1354,7 @@ do_vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff, goto leave; } err = gpgrt_b64dec_proc (b64state, b64buf, b64count, - (size_t *)&datalen); + &datalen); if (err && gpg_err_code (err) != GPG_ERR_EOF) { xfree (b64state); From e0898d0628789414da23e0526c87df1885c8b3ae Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Thu, 23 Jan 2020 10:29:10 +0900 Subject: [PATCH 24/31] random: Fix include of config.h. * random/random-drbg.c: Include config.h earlier. -- GnuPG-bug-id: 4818 Reported-by: Bruno Haible Signed-off-by: NIIBE Yutaka --- random/random-drbg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/random/random-drbg.c b/random/random-drbg.c index e0b4230e..6124f5fb 100644 --- a/random/random-drbg.c +++ b/random/random-drbg.c @@ -146,12 +146,12 @@ * gcry_randomize(outbuf, OUTLEN, GCRY_STRONG_RANDOM); */ +#include + #include #include #include -#include - #include "g10lib.h" #include "random.h" #include "rand-internal.h" From dee702e80551c8bdaf842bce2e5442a649948dd1 Mon Sep 17 00:00:00 2001 From: Paul Wolneykien Date: Mon, 30 Sep 2019 16:42:02 +0300 Subject: [PATCH 25/31] GOST VKO support in ECDH: multiply by an optional UKM value Signed-off-by: Paul Wolneykien --- cipher/ecc.c | 71 +++++++++++++++++++++++++++++++++++++++++++- cipher/pubkey-util.c | 23 +++++++++++++- src/cipher.h | 2 +- 3 files changed, 93 insertions(+), 3 deletions(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index 49c2c0f6..9326f648 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -879,6 +879,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) gcry_mpi_t mpi_s = NULL; gcry_mpi_t mpi_e = NULL; gcry_mpi_t data = NULL; + gcry_mpi_t salt = NULL; mpi_ec_t ec = NULL; int flags = 0; int no_error_on_infinity; @@ -922,6 +923,29 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_clear_bit (data, i); mpi_set_highbit (data, ec->nbits - 1); } + + /* For GOST extract the UKM value. If its length is unspecified + take 64 bits as default. */ + if ((flags & PUBKEY_FLAG_GOST)) + { + unsigned int ukm_blen = ctx.saltlen ? ctx.saltlen : 64; + if (_gcry_mpi_get_nbits (data) < ukm_blen) + { + rc = GPG_ERR_TOO_SHORT; + goto leave; + } + salt = _gcry_mpi_copy (data); + if (!salt) + { + rc = gpg_error_from_syserror (); + goto leave; + } + _gcry_mpi_clear_highbit (salt, ukm_blen); + if (DBG_CIPHER) + log_printmpi ("UKM: ", salt); + _gcry_mpi_rshift (data, data, ukm_blen); + } + if (DBG_CIPHER) log_mpidump ("ecc_encrypt data", data); @@ -957,6 +981,10 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* R = kQ <=> R = kdG */ _gcry_mpi_ec_mul_point (&R, data, ec->Q, ec); + /* Multiply the resulting point by a salt value if any. */ + if (salt) + _gcry_mpi_ec_mul_point (&R, salt, &R, ec); + if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) { /* @@ -1019,7 +1047,14 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) } if (!rc) - rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); + { + if (DBG_CIPHER) + { + log_printmpi ("ecc_encrypt res", mpi_s); + log_printmpi ("ecc_encrypt public key e", mpi_e); + } + rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); + } leave: _gcry_mpi_release (data); @@ -1027,6 +1062,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_mpi_release (mpi_e); _gcry_mpi_ec_free (ec); _gcry_pk_util_free_encoding_ctx (&ctx); + _gcry_mpi_release (salt); if (DBG_CIPHER) log_debug ("ecc_encrypt => %s\n", gpg_strerror (rc)); return rc; @@ -1052,6 +1088,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_point_struct kG; mpi_point_struct R; gcry_mpi_t r = NULL; + gcry_mpi_t salt = NULL; int flags = 0; int enable_specific_point_validation; @@ -1100,6 +1137,32 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) else enable_specific_point_validation = 0; + /* For GOST extract the UKM value. */ + if ((flags & PUBKEY_FLAG_GOST) || 0 == strncmp ("GOST", sk.E.name, 4)) + { + // FIXME: Expect an uncompressed point format 0x04... + int key_len = 2*nbits/8 + 1; + int data_len = (_gcry_mpi_get_nbits (data_e)+7)/8; + int ukm_blen = (data_len - key_len) * 8; + if (ukm_blen < 64) + { + rc = GPG_ERR_TOO_SHORT; + goto leave; + } + salt = _gcry_mpi_copy (data_e); + if (!salt) + { + rc = gpg_error_from_syserror (); + goto leave; + } + _gcry_mpi_clear_highbit (salt, ukm_blen); + if (DBG_CIPHER) + log_printmpi ("UKM: ", salt); + _gcry_mpi_rshift (data_e, data_e, ukm_blen); + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt d_e", data_e); + } + /* * Compute the plaintext. */ @@ -1134,9 +1197,14 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + /* R = dkG */ _gcry_mpi_ec_mul_point (&R, ec->d, &kG, ec); + /* Multiply the resulting point by a salt value if any. */ + if (salt) + _gcry_mpi_ec_mul_point (&R, salt, &R, ec); + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so: */ { gcry_mpi_t x, y; @@ -1212,6 +1280,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) sexp_release (l1); _gcry_mpi_ec_free (ec); _gcry_pk_util_free_encoding_ctx (&ctx); + _gcry_mpi_release (salt); if (DBG_CIPHER) log_debug ("ecc_decrypt => %s\n", gpg_strerror (rc)); return rc; diff --git a/cipher/pubkey-util.c b/cipher/pubkey-util.c index c52185de..621000ba 100644 --- a/cipher/pubkey-util.c +++ b/cipher/pubkey-util.c @@ -657,7 +657,7 @@ _gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx) () or (data - [(flags [raw, direct, pkcs1, oaep, pss, no-blinding, rfc6979, eddsa])] + [(flags [raw, direct, pkcs1, oaep, pss, no-blinding, rfc6979, eddsa, gost])] [(hash )] [(value )] [(hash-algo )] @@ -674,6 +674,7 @@ _gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx) LABEL is specific to OAEP. SALT-LENGTH is for PSS it is limited to 16384 bytes. + For GOST a SALT-LENGTH means the length of UKM in bits. RANDOM-OVERRIDE is used to replace random nonces for regression testing. */ @@ -823,6 +824,26 @@ _gcry_pk_util_data_to_mpi (gcry_sexp_t input, gcry_mpi_t *ret_mpi, *ret_mpi = sexp_nth_mpi (lvalue, 1, GCRYMPI_FMT_USG); if (!*ret_mpi) rc = GPG_ERR_INV_OBJ; + + if (parsed_flags & PUBKEY_FLAG_GOST) + { + gcry_sexp_t list; + /* Get SALT-LENGTH (UKM length). */ + list = sexp_find_token (ldata, "salt-length", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + ctx->saltlen = (unsigned int)strtoul (s, NULL, 10); + sexp_release (list); + } + else + ctx->saltlen = 0; + } } else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lvalue && ctx->op == PUBKEY_OP_ENCRYPT) diff --git a/src/cipher.h b/src/cipher.h index 1fe50890..55eedb47 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -77,7 +77,7 @@ struct pk_encoding_ctx unsigned char *label; size_t labellen; - /* for PSS */ + /* for PSS or GOST (UKM length in bits)*/ size_t saltlen; int (* verify_cmp) (void *opaque, gcry_mpi_t tmp); From f33218551430d4fb038332d5ac21cf4fa23110d5 Mon Sep 17 00:00:00 2001 From: Paul Wolneykien Date: Wed, 13 Nov 2019 15:25:04 +0300 Subject: [PATCH 26/31] Updated the top encrypt/decrypt comment with info about salt values Signed-off-by: Paul Wolneykien --- cipher/ecc.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index 9326f648..2dfb4063 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -855,20 +855,30 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) * dG - public long-term key * k - ephemeral scalar * kG - ephemeral public key - * dkG - shared secret + * S - optional salt value currently used with GOST + * kSdG - shared secret * * ecc_encrypt_raw description: - * input: - * data[0] : private scalar (k) + * input: An S-expression with: + * a private scalar (k) + * an optional salt value (S) * output: A new S-expression with the parameters: - * s : shared point (kdG) + * s : shared point (kSdG) * e : generated ephemeral public key (kG) * + * For information about the format of the input S-expression + * see _gcry_pk_util_data_to_mpi(). + * * ecc_decrypt_raw description: * input: - * data[0] : a point kG (ephemeral public key) + * data[0] : a point kG (ephemeral public key) with an optional + * salt value (S) * output: - * result[0] : shared point (kdG) + * result[0] : shared point (kSdG) + * + * The input format of the salt value to be used with ecc_decrypt_raw() + * depends on the underlying public key algorithm. For GOST keys, the salt + * value should be passed in the low bits of the input MPI value. */ static gcry_err_code_t ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) @@ -1070,9 +1080,10 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* input: - * data[0] : a point kG (ephemeral public key) + * data[0] : a point kG (ephemeral public key) with an optional + * salt value (S) * output: - * resaddr[0] : shared point kdG + * resaddr[0] : shared point kSdG * * see ecc_encrypt_raw for details. */ From 14b085b974a5fd25be5b804ac7218d58ca3aff82 Mon Sep 17 00:00:00 2001 From: Paul Wolneykien Date: Wed, 13 Nov 2019 15:32:44 +0300 Subject: [PATCH 27/31] Fixed the comment format Signed-off-by: Paul Wolneykien --- cipher/ecc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index 2dfb4063..81b2770f 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -1151,7 +1151,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* For GOST extract the UKM value. */ if ((flags & PUBKEY_FLAG_GOST) || 0 == strncmp ("GOST", sk.E.name, 4)) { - // FIXME: Expect an uncompressed point format 0x04... + /* FIXME: Expect an uncompressed point format 0x04... */ int key_len = 2*nbits/8 + 1; int data_len = (_gcry_mpi_get_nbits (data_e)+7)/8; int ukm_blen = (data_len - key_len) * 8; From 77ab7d3b86053c6877179fe55858009c46007931 Mon Sep 17 00:00:00 2001 From: Paul Wolneykien Date: Wed, 13 Nov 2019 15:33:12 +0300 Subject: [PATCH 28/31] Removed extra newlines Signed-off-by: Paul Wolneykien --- cipher/ecc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index 81b2770f..c190f60e 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -1208,7 +1208,6 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } - /* R = dkG */ _gcry_mpi_ec_mul_point (&R, ec->d, &kG, ec); From 3beb55fba7b4ed2c618e04af227430d90e1b5464 Mon Sep 17 00:00:00 2001 From: Paul Wolneykien Date: Wed, 11 Dec 2019 14:01:11 +0300 Subject: [PATCH 29/31] Additional check that salt value isn't 0 Signed-off-by: Paul Wolneykien --- cipher/ecc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index c190f60e..ebe2e0e6 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -992,7 +992,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_mpi_ec_mul_point (&R, data, ec->Q, ec); /* Multiply the resulting point by a salt value if any. */ - if (salt) + if (salt && gcry_mpi_cmp_ui (salt, 0)) _gcry_mpi_ec_mul_point (&R, salt, &R, ec); if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) @@ -1212,7 +1212,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_mpi_ec_mul_point (&R, ec->d, &kG, ec); /* Multiply the resulting point by a salt value if any. */ - if (salt) + if (salt && gcry_mpi_cmp_ui (salt, 0)) _gcry_mpi_ec_mul_point (&R, salt, &R, ec); /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so: */ From c936d711a765ae8b953a541565dfa6a3c19aa035 Mon Sep 17 00:00:00 2001 From: Paul Wolneykien Date: Wed, 11 Dec 2019 14:14:41 +0300 Subject: [PATCH 30/31] Fix: Detect GOST mode by the flags only in ecc_decrypt_raw() Signed-off-by: Paul Wolneykien --- cipher/ecc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index ebe2e0e6..0bef303b 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -1149,7 +1149,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) enable_specific_point_validation = 0; /* For GOST extract the UKM value. */ - if ((flags & PUBKEY_FLAG_GOST) || 0 == strncmp ("GOST", sk.E.name, 4)) + if ((flags & PUBKEY_FLAG_GOST)) { /* FIXME: Expect an uncompressed point format 0x04... */ int key_len = 2*nbits/8 + 1; From 6a318203dfe4f535614bd296c2daeecab3877b60 Mon Sep 17 00:00:00 2001 From: Paul Wolneykien Date: Wed, 11 Dec 2019 14:16:38 +0300 Subject: [PATCH 31/31] Fix: The note about the uncompressed point is not a real FIXME Signed-off-by: Paul Wolneykien --- cipher/ecc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cipher/ecc.c b/cipher/ecc.c index 0bef303b..f2e8bf00 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -1151,7 +1151,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* For GOST extract the UKM value. */ if ((flags & PUBKEY_FLAG_GOST)) { - /* FIXME: Expect an uncompressed point format 0x04... */ + /* Expect the uncompressed point format 0x04... */ int key_len = 2*nbits/8 + 1; int data_len = (_gcry_mpi_get_nbits (data_e)+7)/8; int ukm_blen = (data_len - key_len) * 8;