diff --git a/admin/parse-asm/driver.ed25519.py b/admin/parse-asm/driver.ed25519.py new file mode 100644 index 000000000..75e377a86 --- /dev/null +++ b/admin/parse-asm/driver.ed25519.py @@ -0,0 +1,238 @@ +from parse import parse_file +from driver import ( + Architecture_aarch64, + Architecture_amd64, + RustDriver, +) + +if __name__ == "__main__": + # edwards25519_decode (x86_64) + with open( + "../../thirdparty/s2n-bignum/x86/curve25519/edwards25519_decode.S" + ) as input, open( + "../../graviola/src/low/x86_64/edwards25519_decode.rs", "w" + ) as output: + d = RustDriver(output, Architecture_amd64) + d.emit_rust_function( + "edwards25519_decode", + parameter_map=[ + ("inout", "z.as_mut_ptr() => _"), + ("inout", "c.as_ptr() => _"), + ], + return_value=("u64", "ret", "ret == 0"), + return_map=("out", "ret"), + hoist=["proc", "edwards25519_decode_loop", "ret"], + rust_decl="fn edwards25519_decode(z: &mut [u64; 8], c: &[u8; 32]) -> bool", + ) + parse_file(input, d) + + # edwards25519_scalarmulbase (x86_64) + with open( + "../../thirdparty/s2n-bignum/x86/curve25519/edwards25519_scalarmulbase.S" + ) as input, open( + "../../graviola/src/low/x86_64/edwards25519_scalarmulbase.rs", "w" + ) as output: + d = RustDriver(output, Architecture_amd64) + d.add_const_symbol("edwards25519_scalarmulbase_0g") + d.add_const_symbol("edwards25519_scalarmulbase_251g") + d.add_const_symbol("edwards25519_scalarmulbase_gtable") + d.emit_rust_function( + "edwards25519_scalarmulbase", + parameter_map=[ + ("inout", "res.as_mut_ptr() => _"), + ("inout", "scalar.as_ptr() => _"), + ], + rust_decl="fn edwards25519_scalarmulbase(res: &mut [u64; 8], scalar: &[u64; 4])", + ) + parse_file(input, d) + + # edwards25519_scalarmuldouble (x86_64) + with open( + "../../thirdparty/s2n-bignum/x86/curve25519/edwards25519_scalarmuldouble.S" + ) as input, open( + "../../graviola/src/low/x86_64/edwards25519_scalarmuldouble.rs", "w" + ) as output: + d = RustDriver(output, Architecture_amd64) + d.add_const_symbol("edwards25519_scalarmuldouble_table") + d.emit_rust_function( + "edwards25519_scalarmuldouble", + parameter_map=[ + ("inout", "res.as_mut_ptr() => _"), + ("inout", "scalar.as_ptr() => _"), + ("inout", "point.as_ptr() => _"), + ("inout", "bscalar.as_ptr() => _"), + ], + hoist=["proc", "edwards25519_scalarmuldouble_pepadd", "ret"], + rust_decl="fn edwards25519_scalarmuldouble(res: &mut [u64; 8], scalar: &[u64; 4], point: &[u64; 8], bscalar: &[u64; 4])", + ) + parse_file(input, d) + + # bignum_madd_n25519 (x86_64) + with open( + "../../thirdparty/s2n-bignum/x86/curve25519/bignum_madd_n25519.S" + ) as input, open( + "../../graviola/src/low/x86_64/bignum_madd_n25519.rs", "w" + ) as output: + d = RustDriver(output, Architecture_amd64) + d.emit_rust_function( + "bignum_madd_n25519", + parameter_map=[ + ("inout", "z.as_mut_ptr() => _"), + ("inout", "x.as_ptr() => _"), + ("inout", "y.as_ptr() => _"), + ("inout", "c.as_ptr() => _"), + ], + rust_decl="fn bignum_madd_n25519(z: &mut [u64; 4], x: &[u64; 4], y: &[u64; 4], c: &[u64; 4])", + ) + parse_file(input, d) + + # bignum_mod_n25519 (x86_64) + with open( + "../../thirdparty/s2n-bignum/x86/curve25519/bignum_mod_n25519.S" + ) as input, open( + "../../graviola/src/low/x86_64/bignum_mod_n25519.rs", "w" + ) as output: + d = RustDriver(output, Architecture_amd64) + d.emit_rust_function( + "bignum_mod_n25519", + parameter_map=[ + ("inout", "z.as_mut_ptr() => _"), + ("inout", "x.len() => _"), + ("inout", "x.as_ptr() => _"), + ], + hoist=["linear", "bignum_mod_n25519_shortinput", "jmp"], + rust_decl="fn bignum_mod_n25519(z: &mut [u64; 4], x: &[u64])", + ) + parse_file(input, d) + + # bignum_neg_p25519 (x86_64) + with open( + "../../thirdparty/s2n-bignum/x86/curve25519/bignum_neg_p25519.S" + ) as input, open( + "../../graviola/src/low/x86_64/bignum_neg_p25519.rs", "w" + ) as output: + d = RustDriver(output, Architecture_amd64) + d.emit_rust_function( + "bignum_neg_p25519", + parameter_map=[ + ("inout", "z.as_mut_ptr() => _"), + ("inout", "x.as_ptr() => _"), + ], + rust_decl="fn bignum_neg_p25519(z: &mut [u64; 4], x: &[u64; 4])", + ) + parse_file(input, d) + + # edwards25519_decode (aarch64) + with open( + "../../thirdparty/s2n-bignum/arm/curve25519/edwards25519_decode_alt.S" + ) as input, open( + "../../graviola/src/low/aarch64/edwards25519_decode.rs", "w" + ) as output: + d = RustDriver(output, Architecture_aarch64) + d.emit_rust_function( + "edwards25519_decode_alt", + parameter_map=[ + ("inout", "z.as_mut_ptr() => ret"), + ("inout", "c.as_ptr() => _"), + ], + return_value=("u64", "ret", "ret == 0"), + hoist=["proc", "edwards25519_decode_alt_loop", "ret"], + rust_decl="fn edwards25519_decode(z: &mut [u64; 8], c: &[u8; 32]) -> bool", + ) + parse_file(input, d) + + # edwards25519_scalarmulbase (aarch64) + with open( + "../../thirdparty/s2n-bignum/arm/curve25519/edwards25519_scalarmulbase_alt.S" + ) as input, open( + "../../graviola/src/low/aarch64/edwards25519_scalarmulbase.rs", "w" + ) as output: + d = RustDriver(output, Architecture_aarch64) + d.add_const_symbol("edwards25519_scalarmulbase_alt_edwards25519_0g") + d.add_const_symbol("edwards25519_scalarmulbase_alt_edwards25519_251g") + d.add_const_symbol("edwards25519_scalarmulbase_alt_edwards25519_gtable") + d.emit_rust_function( + "edwards25519_scalarmulbase_alt", + parameter_map=[ + ("inout", "res.as_mut_ptr() => _"), + ("inout", "scalar.as_ptr() => _"), + ], + rust_decl="fn edwards25519_scalarmulbase(res: &mut [u64; 8], scalar: &[u64; 4])", + ) + parse_file(input, d) + + # edwards25519_scalarmuldouble (aarch64) + with open( + "../../thirdparty/s2n-bignum/arm/curve25519/edwards25519_scalarmuldouble_alt.S" + ) as input, open( + "../../graviola/src/low/aarch64/edwards25519_scalarmuldouble.rs", "w" + ) as output: + d = RustDriver(output, Architecture_aarch64) + d.add_const_symbol("edwards25519_scalarmuldouble_alt_table") + d.emit_rust_function( + "edwards25519_scalarmuldouble_alt", + parameter_map=[ + ("inout", "res.as_mut_ptr() => _"), + ("inout", "scalar.as_ptr() => _"), + ("inout", "point.as_ptr() => _"), + ("inout", "bscalar.as_ptr() => _"), + ], + hoist=["proc", "edwards25519_scalarmuldouble_alt_pepadd", "ret"], + rust_decl="fn edwards25519_scalarmuldouble(res: &mut [u64; 8], scalar: &[u64; 4], point: &[u64; 8], bscalar: &[u64; 4])", + ) + parse_file(input, d) + + # bignum_madd_n25519 (aarch64) + with open( + "../../thirdparty/s2n-bignum/arm/curve25519/bignum_madd_n25519_alt.S" + ) as input, open( + "../../graviola/src/low/aarch64/bignum_madd_n25519.rs", "w" + ) as output: + d = RustDriver(output, Architecture_aarch64) + d.emit_rust_function( + "bignum_madd_n25519_alt", + parameter_map=[ + ("inout", "z.as_mut_ptr() => _"), + ("inout", "x.as_ptr() => _"), + ("inout", "y.as_ptr() => _"), + ("inout", "c.as_ptr() => _"), + ], + rust_decl="fn bignum_madd_n25519(z: &mut [u64; 4], x: &[u64; 4], y: &[u64; 4], c: &[u64; 4])", + ) + parse_file(input, d) + + # bignum_mod_n25519 (aarch64) + with open( + "../../thirdparty/s2n-bignum/arm/curve25519/bignum_mod_n25519.S" + ) as input, open( + "../../graviola/src/low/aarch64/bignum_mod_n25519.rs", "w" + ) as output: + d = RustDriver(output, Architecture_aarch64) + d.emit_rust_function( + "bignum_mod_n25519", + parameter_map=[ + ("inout", "z.as_mut_ptr() => _"), + ("inout", "x.len() => _"), + ("inout", "x.as_ptr() => _"), + ], + hoist=["linear", "bignum_mod_n25519_short", "b"], + rust_decl="fn bignum_mod_n25519(z: &mut [u64; 4], x: &[u64])", + ) + parse_file(input, d) + + # bignum_neg_p25519 (aarch64) + with open( + "../../thirdparty/s2n-bignum/arm/curve25519/bignum_neg_p25519.S" + ) as input, open( + "../../graviola/src/low/aarch64/bignum_neg_p25519.rs", "w" + ) as output: + d = RustDriver(output, Architecture_aarch64) + d.emit_rust_function( + "bignum_neg_p25519", + parameter_map=[ + ("inout", "z.as_mut_ptr() => _"), + ("inout", "x.as_ptr() => _"), + ], + rust_decl="fn bignum_neg_p25519(z: &mut [u64; 4], x: &[u64; 4])", + ) + parse_file(input, d) diff --git a/admin/parse-asm/driver.py b/admin/parse-asm/driver.py index 386b341fa..888b7e29b 100644 --- a/admin/parse-asm/driver.py +++ b/admin/parse-asm/driver.py @@ -1,8 +1,9 @@ -import string +import copy from functools import reduce -import subprocess from io import StringIO -import copy +import re +import string +import subprocess from parse import Type, register_from_token, tokenise, is_comment @@ -218,7 +219,7 @@ def replay(self, other): for ty, args in self.events: other(ty, *args) - +# A pass to collect all defined labels class LabelCollector(QuietDispatcher): def __init__(self): self.labels = set() @@ -231,6 +232,60 @@ def on_label(self, contexts, label): def get_labels(self): return set(self.labels) +RUST_FUNCTION_DECL = re.compile(r"^fn (?P[a-zA-Z0-9_]+)\(") + +# A pass to collect (1) all macro definitions that reference labels, and (2) the +# blocks in which they are expanded. We use this later to fixup the generated +# Rust macros to use the correct local label references. +class MacroWithLabelRefCollector(QuietDispatcher): + def __init__(self): + self.macro_defs = {} + self.macro_exps = {} + self.labels = {} + self.expected_labels = set() + self.expected_functions = {} + self.current_block = None + + def discard_rust_function(self, function): + self.expected_functions[function] = None + + def emit_rust_function(self, name, rust_decl): + rust_name = RUST_FUNCTION_DECL.match(rust_decl).group("name") + self.expected_functions[name] = rust_name + + def need_macro_fixup_pass(self): + # we need a fixup pass if there's any macro def that contains a label + return len(self.macro_defs) > 0 + + def on_define(self, name, *value): + # The macro name without parameters + name = tokenise(name)[0] + + if name in self.macro_defs: + print("duplicate macro", name) + + for v in value: + for t in tokenise(v): + if t in self.expected_labels: + self.macro_defs.setdefault(name, set()).add(t) + + def on_function(self, contexts, name): + rust_name = self.expected_functions.get(name, name) + self.current_block = rust_name + + def on_label(self, contexts, label): + if label not in self.expected_labels: + print(f"label ({label}) not in expected_labels") + self.current_block = label + + def on_macro(self, name, operands): + # NOTE: doesn't handle macros used inside other macros + if name not in self.macro_defs: + return + if self.current_block is None: + print(f"macro ({name}) expanded outside block") + return + self.macro_exps.setdefault(name, set()).add(self.current_block) class ConstantArray: def __init__(self, name): @@ -420,9 +475,11 @@ def __init__(self, output, architecture): super(RustDriver, self).__init__() self.collector = Collector() self.label_pass = LabelCollector() + self.macro_pass = MacroWithLabelRefCollector() self.formatter = RustFormatter(output, architecture) def discard_rust_function(self, function): + self.macro_pass.discard_rust_function(function) self.formatter.discard_rust_function(function) def emit_rust_function( @@ -436,6 +493,7 @@ def emit_rust_function( allow_inline=True, hoist=None, ): + self.macro_pass.emit_rust_function(name, rust_decl) self.formatter.emit_rust_function( name, parameter_map, @@ -464,9 +522,35 @@ def finish(self): # because otherwise it is hard to know which tokens # refer to a later label self.collector.replay(self.label_pass) - self.formatter.expected_labels = self.label_pass.get_labels() - self.collector.replay(self.formatter) + expected_labels = self.label_pass.get_labels() + self.macro_pass.expected_labels = expected_labels + self.formatter.expected_labels = expected_labels + + # do a pass looking for macro definitions that reference labels, which + # we need to fixup later + self.collector.replay(self.macro_pass) + + if not self.macro_pass.need_macro_fixup_pass(): + self.collector.replay(self.formatter) + output = self.formatter.output + else: + output = self.formatter.output + self.formatter.output = StringIO() + self.collector.replay(self.formatter) + + # fixup macro definitions in the generated Rust code so the local + # label references are valid + macro_fixup_pass( + output, + self.formatter.output, + self.macro_pass.macro_defs, + self.macro_pass.macro_exps, + ) + output.close() + filename = output.name + subprocess.check_call(["rustfmt", filename]) + print("GENERATED", filename) class RustFormatter(Dispatcher): def __init__(self, output, architecture): @@ -544,7 +628,9 @@ def find_label(self, label, defn=False): """ func = self.function_state if func is None: - return 0, True + # use `-1` to indicate that this label target needs to be filled + # in later in the `macro_fixup_pass` + return -1, True if defn: func.labels_defined.add(label) @@ -1001,12 +1087,78 @@ def finish_function(self): def on_eof(self): self.finish_constant_array() - filename = self.output.name - self.output.close() +# A pass to fixup macro definitions that reference labels. This pass runs on the +# Rust code generated by prior passes to more easily account for macro +# definitions and label blocks getting moved around by fn hoisting. +def macro_fixup_pass(rust_output, rust_input, macro_defs, macro_exps): + # Rust regex matchers + function = re.compile(r"^pub\(crate\) fn (?P[a-zA-Z0-9_]+)\(") + macro_def = re.compile(r"^macro_rules! (?P[a-zA-Z0-9_]+) {$") + label_def = re.compile(r"^Q!\(Label!\(\"(?P[a-zA-Z0-9_]+)\", (?P\d+)\) \":\"\),$") + label_ref = re.compile(r"Label!\(\"(?P[a-zA-Z0-9_]+)\", -1, Before\)") + + lines = rust_input.getvalue().splitlines() + + # find line numbers for label and fn defs + # ... pub(crate) fn name( + # ... Label!("name", id) ":" + + defs = {} + fixups = {} + macro_context = None + + for ln, line in enumerate(lines): + m = function.match(line) + if m: + defs[m.group("name")] = (ln, None) + macro_context = None + continue + m = label_def.match(line) + if m: + defs[m.group("name")] = (ln, m.group("id")) + macro_context = None + continue + m = macro_def.match(line) + if m: + macro_context = m.group("name") + continue + if macro_context is not None: + for m in label_ref.finditer(line): + key = (macro_context, m.group("name")) + fixups.setdefault(key, set()).add(ln) + + # each expansion for a macro-with-label-ref must be uniformly before or + # after each label it references for our single fixup at the macro + # definition to work - subprocess.check_call(["rustfmt", filename]) - print("GENERATED", filename) + for name in macro_exps: + # build span containing all expansions for this macro + exp_lns = [defs[label_name][0] for label_name in macro_exps[name]] + (min_exp_ln, max_exp_ln) = (min(exp_lns), max(exp_lns)) + + for label_ref in macro_defs[name]: + (label_ref_ln, label_ref_id) = defs[label_ref] + + if min_exp_ln <= label_ref_ln <= max_exp_ln: + print( + f"macro_fixup_pass: macro ({name}): Unable to fixup label \ + ref ({label_ref}) as all macro expansions are not uniformly \ + before or after the label definition" + ) + continue + + direction = "Before" if label_ref_ln <= min_exp_ln else "After" + + # rewrite the macro definitions to use the correct label target + # ... Label!("name", -1, _) + # -> Label!("name", id, Before|After) + + pat = f'Label!("{label_ref}", -1, Before)' + rep = f'Label!("{label_ref}", {label_ref_id}, {direction})' + for ln in fixups[(name, label_ref)]: + lines[ln] = lines[ln].replace(pat, rep) + rust_output.write("\n".join(lines)) if __name__ == "__main__": assert tokenise("1234+1235") == ["1234", "+", "1235"] diff --git a/graviola/src/low/aarch64/bignum_madd_n25519.rs b/graviola/src/low/aarch64/bignum_madd_n25519.rs new file mode 100644 index 000000000..6cf6c8f29 --- /dev/null +++ b/graviola/src/low/aarch64/bignum_madd_n25519.rs @@ -0,0 +1,275 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Multiply-add modulo the order of the curve25519/edwards25519 basepoint +// Inputs x[4], y[4], c[4]; output z[4] +// +// extern void bignum_madd_n25519_alt(uint64_t z[static 4], +// const uint64_t x[static 4], +// const uint64_t y[static 4], +// const uint64_t c[static 4]); +// +// Performs z := (x * y + c) mod n_25519, where the modulus is +// n_25519 = 2^252 + 27742317777372353535851937790883648493, the +// order of the curve25519/edwards25519 basepoint. The result z +// and the inputs x, y and c are all 4 digits (256 bits). +// +// Standard ARM ABI: X0 = z, X1 = x, X2 = y, X3 = c +// ---------------------------------------------------------------------------- + +// Backup of the input pointer so we can modify x0 + +macro_rules! z { + () => { + "x19" + }; +} + +// Temporaries for reduction phase + +macro_rules! q { + () => { + "x2" + }; +} +macro_rules! n0 { + () => { + "x3" + }; +} +macro_rules! n1 { + () => { + "x4" + }; +} +macro_rules! t0 { + () => { + "x5" + }; +} +macro_rules! t1 { + () => { + "x6" + }; +} +macro_rules! t2 { + () => { + "x7" + }; +} + +// Loading large constants + +macro_rules! movbig { + ($nn:expr, $n3:expr, $n2:expr, $n1:expr, $n0:expr) => { Q!( + "movz " $nn ", " $n0 ";\n" + "movk " $nn ", " $n1 ", lsl #16;\n" + "movk " $nn ", " $n2 ", lsl #32;\n" + "movk " $nn ", " $n3 ", lsl #48" + )} +} + +// Single round of modular reduction mod_n25519, mapping +// [m4;m3;m2;m1;m0] = m to [m3;m2;m1;m0] = m mod n_25519, +// *assuming* the input m < 2^64 * n_25519. This is very +// close to the loop body of the bignum_mod_n25519 function. + +macro_rules! reduce { + ($m4:expr, $m3:expr, $m2:expr, $m1:expr, $m0:expr) => { Q!( + "extr " q!() ", " $m4 ", " $m3 ", #60;\n" + "and " $m3 ", " $m3 ", #0x0FFFFFFFFFFFFFFF;\n" + "sub " q!() ", " q!() ", " $m4 ", lsr #60;\n" + "and " t0!() ", " $m4 ", #0xF000000000000000;\n" + "add " $m3 ", " $m3 ", " t0!() ";\n" + "mul " t0!() ", " n0!() ", " q!() ";\n" + "mul " t1!() ", " n1!() ", " q!() ";\n" + "umulh " t2!() ", " n0!() ", " q!() ";\n" + "adds " t1!() ", " t1!() ", " t2!() ";\n" + "umulh " t2!() ", " n1!() ", " q!() ";\n" + "adc " t2!() ", " t2!() ", xzr;\n" + "subs " $m0 ", " $m0 ", " t0!() ";\n" + "sbcs " $m1 ", " $m1 ", " t1!() ";\n" + "sbcs " $m2 ", " $m2 ", " t2!() ";\n" + "sbcs " $m3 ", " $m3 ", xzr;\n" + "csel " t0!() ", " n0!() ", xzr, cc;\n" + "csel " t1!() ", " n1!() ", xzr, cc;\n" + "adds " $m0 ", " $m0 ", " t0!() ";\n" + "and " t2!() ", " t0!() ", #0x1000000000000000;\n" + "adcs " $m1 ", " $m1 ", " t1!() ";\n" + "adcs " $m2 ", " $m2 ", xzr;\n" + "adc " $m3 ", " $m3 ", " t2!() + )} +} + +// Special case of "reduce" with m4 = 0. As well as not using m4, +// the quotient selection is slightly simpler, just floor(m/2^252) +// versus min (floor(m/2^252)) (2^63-1). + +macro_rules! reduce0 { + ($m3:expr, $m2:expr, $m1:expr, $m0:expr) => { Q!( + "lsr " q!() ", " $m3 ", #60;\n" + "and " $m3 ", " $m3 ", #0x0FFFFFFFFFFFFFFF;\n" + "mul " t0!() ", " n0!() ", " q!() ";\n" + "mul " t1!() ", " n1!() ", " q!() ";\n" + "umulh " t2!() ", " n0!() ", " q!() ";\n" + "adds " t1!() ", " t1!() ", " t2!() ";\n" + "umulh " t2!() ", " n1!() ", " q!() ";\n" + "adc " t2!() ", " t2!() ", xzr;\n" + "subs " $m0 ", " $m0 ", " t0!() ";\n" + "sbcs " $m1 ", " $m1 ", " t1!() ";\n" + "sbcs " $m2 ", " $m2 ", " t2!() ";\n" + "sbcs " $m3 ", " $m3 ", xzr;\n" + "csel " t0!() ", " n0!() ", xzr, cc;\n" + "csel " t1!() ", " n1!() ", xzr, cc;\n" + "adds " $m0 ", " $m0 ", " t0!() ";\n" + "and " t2!() ", " t0!() ", #0x1000000000000000;\n" + "adcs " $m1 ", " $m1 ", " t1!() ";\n" + "adcs " $m2 ", " $m2 ", xzr;\n" + "adc " $m3 ", " $m3 ", " t2!() + )} +} + +/// Multiply-add modulo the order of the curve25519/edwards25519 basepoint +/// +/// Inputs x[4], y[4], c[4]; output z[4] +/// +/// Performs z := (x * y + c) mod n_25519, where the modulus is +/// n_25519 = 2^252 + 27742317777372353535851937790883648493, the +/// order of the curve25519/edwards25519 basepoint. The result z +/// and the inputs x, y and c are all 4 digits (256 bits). +pub(crate) fn bignum_madd_n25519(z: &mut [u64; 4], x: &[u64; 4], y: &[u64; 4], c: &[u64; 4]) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + + Q!(" stp " "x19, x20, [sp, -16] !"), + + // Back up the result pointer so we can overwrite x0 in intermediate steps + + Q!(" mov " z!() ", x0"), + + // First compute [x15;x14;x13;x12;x11;x10;x9;x8] = x * y + c. This + // is a basic schoolbook multiplier similar to the start of + // bignum_mul_p25519_alt except for different registers, but it + // also adds in the c term after the first row accumulation. + + Q!(" ldp " "x13, x14, [x1]"), + Q!(" ldp " "x7, x0, [x2]"), + Q!(" mul " "x8, x13, x7"), + Q!(" umulh " "x9, x13, x7"), + Q!(" mul " "x16, x13, x0"), + Q!(" umulh " "x10, x13, x0"), + Q!(" adds " "x9, x9, x16"), + Q!(" ldp " "x4, x5, [x2, #16]"), + Q!(" mul " "x16, x13, x4"), + Q!(" umulh " "x11, x13, x4"), + Q!(" adcs " "x10, x10, x16"), + Q!(" mul " "x16, x13, x5"), + Q!(" umulh " "x12, x13, x5"), + Q!(" adcs " "x11, x11, x16"), + Q!(" adc " "x12, x12, xzr"), + Q!(" ldp " "x15, x6, [x3]"), + Q!(" adds " "x8, x8, x15"), + Q!(" adcs " "x9, x9, x6"), + Q!(" ldp " "x15, x6, [x3, #16]"), + Q!(" adcs " "x10, x10, x15"), + Q!(" adcs " "x11, x11, x6"), + Q!(" adc " "x12, x12, xzr"), + Q!(" ldp " "x15, x6, [x1, #16]"), + Q!(" mul " "x16, x14, x7"), + Q!(" adds " "x9, x9, x16"), + Q!(" mul " "x16, x14, x0"), + Q!(" adcs " "x10, x10, x16"), + Q!(" mul " "x16, x14, x4"), + Q!(" adcs " "x11, x11, x16"), + Q!(" mul " "x16, x14, x5"), + Q!(" adcs " "x12, x12, x16"), + Q!(" umulh " "x13, x14, x5"), + Q!(" adc " "x13, x13, xzr"), + Q!(" umulh " "x16, x14, x7"), + Q!(" adds " "x10, x10, x16"), + Q!(" umulh " "x16, x14, x0"), + Q!(" adcs " "x11, x11, x16"), + Q!(" umulh " "x16, x14, x4"), + Q!(" adcs " "x12, x12, x16"), + Q!(" adc " "x13, x13, xzr"), + Q!(" mul " "x16, x15, x7"), + Q!(" adds " "x10, x10, x16"), + Q!(" mul " "x16, x15, x0"), + Q!(" adcs " "x11, x11, x16"), + Q!(" mul " "x16, x15, x4"), + Q!(" adcs " "x12, x12, x16"), + Q!(" mul " "x16, x15, x5"), + Q!(" adcs " "x13, x13, x16"), + Q!(" umulh " "x14, x15, x5"), + Q!(" adc " "x14, x14, xzr"), + Q!(" umulh " "x16, x15, x7"), + Q!(" adds " "x11, x11, x16"), + Q!(" umulh " "x16, x15, x0"), + Q!(" adcs " "x12, x12, x16"), + Q!(" umulh " "x16, x15, x4"), + Q!(" adcs " "x13, x13, x16"), + Q!(" adc " "x14, x14, xzr"), + Q!(" mul " "x16, x6, x7"), + Q!(" adds " "x11, x11, x16"), + Q!(" mul " "x16, x6, x0"), + Q!(" adcs " "x12, x12, x16"), + Q!(" mul " "x16, x6, x4"), + Q!(" adcs " "x13, x13, x16"), + Q!(" mul " "x16, x6, x5"), + Q!(" adcs " "x14, x14, x16"), + Q!(" umulh " "x15, x6, x5"), + Q!(" adc " "x15, x15, xzr"), + Q!(" umulh " "x16, x6, x7"), + Q!(" adds " "x12, x12, x16"), + Q!(" umulh " "x16, x6, x0"), + Q!(" adcs " "x13, x13, x16"), + Q!(" umulh " "x16, x6, x4"), + Q!(" adcs " "x14, x14, x16"), + Q!(" adc " "x15, x15, xzr"), + + // Now do the modular reduction and write back + + movbig!(n0!(), "#0x5812", "#0x631a", "#0x5cf5", "#0xd3ed"), + movbig!(n1!(), "#0x14de", "#0xf9de", "#0xa2f7", "#0x9cd6"), + + reduce0!("x15", "x14", "x13", "x12"), + reduce!("x15", "x14", "x13", "x12", "x11"), + reduce!("x14", "x13", "x12", "x11", "x10"), + reduce!("x13", "x12", "x11", "x10", "x9"), + reduce!("x12", "x11", "x10", "x9", "x8"), + + Q!(" stp " "x8, x9, [" z!() "]"), + Q!(" stp " "x10, x11, [" z!() ", #16]"), + + // Restore registers and return + + Q!(" ldp " "x19, x20, [sp], 16"), + inout("x0") z.as_mut_ptr() => _, + inout("x1") x.as_ptr() => _, + inout("x2") y.as_ptr() => _, + inout("x3") c.as_ptr() => _, + // clobbers + out("x10") _, + out("x11") _, + out("x12") _, + out("x13") _, + out("x14") _, + out("x15") _, + out("x16") _, + out("x20") _, + out("x4") _, + out("x5") _, + out("x6") _, + out("x7") _, + out("x8") _, + out("x9") _, + ) + }; +} diff --git a/graviola/src/low/aarch64/bignum_mod_n25519.rs b/graviola/src/low/aarch64/bignum_mod_n25519.rs new file mode 100644 index 000000000..a1304917b --- /dev/null +++ b/graviola/src/low/aarch64/bignum_mod_n25519.rs @@ -0,0 +1,273 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Reduce modulo basepoint order, z := x mod n_25519 +// Input x[k]; output z[4] +// +// extern void bignum_mod_n25519(uint64_t z[static 4], uint64_t k, +// const uint64_t *x); +// +// Reduction is modulo the order of the curve25519/edwards25519 basepoint, +// which is n_25519 = 2^252 + 27742317777372353535851937790883648493 +// +// Standard ARM ABI: X0 = z, X1 = k, X2 = x +// ---------------------------------------------------------------------------- + +macro_rules! z { + () => { + "x0" + }; +} +macro_rules! k { + () => { + "x1" + }; +} +macro_rules! x { + () => { + "x2" + }; +} + +macro_rules! m0 { + () => { + "x3" + }; +} +macro_rules! m1 { + () => { + "x4" + }; +} +macro_rules! m2 { + () => { + "x5" + }; +} +macro_rules! m3 { + () => { + "x6" + }; +} + +macro_rules! t0 { + () => { + "x7" + }; +} +macro_rules! t1 { + () => { + "x8" + }; +} +macro_rules! t2 { + () => { + "x9" + }; +} +macro_rules! t3 { + () => { + "x10" + }; +} + +macro_rules! n0 { + () => { + "x11" + }; +} +macro_rules! n1 { + () => { + "x12" + }; +} + +// These two are aliased: we only load d when finished with q + +macro_rules! q { + () => { + "x13" + }; +} +macro_rules! d { + () => { + "x13" + }; +} + +// Loading large constants + +macro_rules! movbig { + ($nn:expr, $n3:expr, $n2:expr, $n1:expr, $n0:expr) => { Q!( + "movz " $nn ", " $n0 ";\n" + "movk " $nn ", " $n1 ", lsl #16;\n" + "movk " $nn ", " $n2 ", lsl #32;\n" + "movk " $nn ", " $n3 ", lsl #48" + )} +} + +/// Reduce modulo basepoint order, z := x mod n_25519 +/// +/// Input x[k]; output z[4] +/// +/// Reduction is modulo the order of the curve25519/edwards25519 basepoint, +/// which is n_25519 = 2^252 + 27742317777372353535851937790883648493 +pub(crate) fn bignum_mod_n25519(z: &mut [u64; 4], x: &[u64]) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + + // If the input is already <= 3 words long, go to a trivial "copy" path + + Q!(" cmp " k!() ", #4"), + Q!(" bcc " Label!("bignum_mod_n25519_short", 2, After)), + + // Otherwise load the top 4 digits (top-down) and reduce k by 4 + // This [m3;m2;m1;m0] is the initial x where we begin reduction. + + Q!(" sub " k!() ", " k!() ", #4"), + Q!(" lsl " t0!() ", " k!() ", #3"), + Q!(" add " t0!() ", " t0!() ", " x!()), + Q!(" ldp " m2!() ", " m3!() ", [" t0!() ", #16]"), + Q!(" ldp " m0!() ", " m1!() ", [" t0!() "]"), + + // Load the complicated two words of n_25519 = 2^252 + [n1; n0] + + movbig!(n0!(), "#0x5812", "#0x631a", "#0x5cf5", "#0xd3ed"), + movbig!(n1!(), "#0x14de", "#0xf9de", "#0xa2f7", "#0x9cd6"), + + // Get the quotient estimate q = floor(x/2^252). + // Also delete it from m3, in effect doing x' = x - q * 2^252 + + Q!(" lsr " q!() ", " m3!() ", #60"), + Q!(" and " m3!() ", " m3!() ", #0x0FFFFFFFFFFFFFFF"), + + // Multiply [t2;t1;t0] = q * [n1;n0] + + Q!(" mul " t0!() ", " n0!() ", " q!()), + Q!(" mul " t1!() ", " n1!() ", " q!()), + Q!(" umulh " t2!() ", " n0!() ", " q!()), + Q!(" adds " t1!() ", " t1!() ", " t2!()), + Q!(" umulh " t2!() ", " n1!() ", " q!()), + Q!(" adc " t2!() ", " t2!() ", xzr"), + + // Subtract [m3;m2;m1;m0] = x' - q * [n1;n0] = x - q * n_25519 + + Q!(" subs " m0!() ", " m0!() ", " t0!()), + Q!(" sbcs " m1!() ", " m1!() ", " t1!()), + Q!(" sbcs " m2!() ", " m2!() ", " t2!()), + Q!(" sbcs " m3!() ", " m3!() ", xzr"), + + // If this borrows (CF = 0 because of inversion), add back n_25519. + // The masked n3 digit exploits the fact that bit 60 of n0 is set. + + Q!(" csel " t0!() ", " n0!() ", xzr, cc"), + Q!(" csel " t1!() ", " n1!() ", xzr, cc"), + Q!(" adds " m0!() ", " m0!() ", " t0!()), + Q!(" adcs " m1!() ", " m1!() ", " t1!()), + Q!(" and " t0!() ", " t0!() ", #0x1000000000000000"), + Q!(" adcs " m2!() ", " m2!() ", xzr"), + Q!(" adc " m3!() ", " m3!() ", " t0!()), + + // Now do (k-4) iterations of 5->4 word modular reduction. Each one + // is similar to the sequence above except for the more refined quotient + // estimation process. + + Q!(" cbz " k!() ", " Label!("bignum_mod_n25519_writeback", 3, After)), + + Q!(Label!("bignum_mod_n25519_loop", 4) ":"), + + // Assume that the new 5-digit x is 2^64 * previous_x + next_digit. + // Get the quotient estimate q = max (floor(x/2^252)) (2^64 - 1) + // and first compute x' = x - 2^252 * q. + + Q!(" extr " q!() ", " m3!() ", " m2!() ", #60"), + Q!(" and " m2!() ", " m2!() ", #0x0FFFFFFFFFFFFFFF"), + Q!(" sub " q!() ", " q!() ", " m3!() ", lsr #60"), + Q!(" and " m3!() ", " m3!() ", #0xF000000000000000"), + Q!(" add " m2!() ", " m2!() ", " m3!()), + + // Multiply [t2;t1;t0] = q * [n1;n0] + + Q!(" mul " t0!() ", " n0!() ", " q!()), + Q!(" mul " t1!() ", " n1!() ", " q!()), + Q!(" umulh " t2!() ", " n0!() ", " q!()), + Q!(" adds " t1!() ", " t1!() ", " t2!()), + Q!(" umulh " t2!() ", " n1!() ", " q!()), + Q!(" adc " t2!() ", " t2!() ", xzr"), + + // Decrement k and load the next digit (note that d aliases to q) + + Q!(" sub " k!() ", " k!() ", #1"), + Q!(" ldr " d!() ", [" x!() ", " k!() ", lsl #3]"), + + // Subtract [t3;t2;t1;t0] = x' - q * [n1;n0] = x - q * n_25519 + + Q!(" subs " t0!() ", " d!() ", " t0!()), + Q!(" sbcs " t1!() ", " m0!() ", " t1!()), + Q!(" sbcs " t2!() ", " m1!() ", " t2!()), + Q!(" sbcs " t3!() ", " m2!() ", xzr"), + + // If this borrows (CF = 0 because of inversion), add back n_25519. + // The masked n3 digit exploits the fact that bit 60 of n1 is set. + + Q!(" csel " m0!() ", " n0!() ", xzr, cc"), + Q!(" csel " m1!() ", " n1!() ", xzr, cc"), + Q!(" adds " m0!() ", " t0!() ", " m0!()), + Q!(" and " m3!() ", " m1!() ", #0x1000000000000000"), + Q!(" adcs " m1!() ", " t1!() ", " m1!()), + Q!(" adcs " m2!() ", " t2!() ", xzr"), + Q!(" adc " m3!() ", " t3!() ", " m3!()), + + Q!(" cbnz " k!() ", " Label!("bignum_mod_n25519_loop", 4, Before)), + + // Finally write back [m3;m2;m1;m0] and return + + Q!(Label!("bignum_mod_n25519_writeback", 3) ":"), + Q!(" stp " m0!() ", " m1!() ", [" z!() "]"), + Q!(" stp " m2!() ", " m3!() ", [" z!() ", #16]"), + // linear hoisting in -> b after bignum_mod_n25519_short + Q!(" b " Label!("hoist_finish", 5, After)), + + // Short case: just copy the input with zero-padding + + Q!(Label!("bignum_mod_n25519_short", 2) ":"), + Q!(" mov " m0!() ", xzr"), + Q!(" mov " m1!() ", xzr"), + Q!(" mov " m2!() ", xzr"), + Q!(" mov " m3!() ", xzr"), + + Q!(" cbz " k!() ", " Label!("bignum_mod_n25519_writeback", 3, Before)), + Q!(" ldr " m0!() ", [" x!() "]"), + Q!(" subs " k!() ", " k!() ", #1"), + Q!(" beq " Label!("bignum_mod_n25519_writeback", 3, Before)), + Q!(" ldr " m1!() ", [" x!() ", #8]"), + Q!(" subs " k!() ", " k!() ", #1"), + Q!(" beq " Label!("bignum_mod_n25519_writeback", 3, Before)), + Q!(" ldr " m2!() ", [" x!() ", #16]"), + Q!(" b " Label!("bignum_mod_n25519_writeback", 3, Before)), + Q!(Label!("hoist_finish", 5) ":"), + inout("x0") z.as_mut_ptr() => _, + inout("x1") x.len() => _, + inout("x2") x.as_ptr() => _, + // clobbers + out("x10") _, + out("x11") _, + out("x12") _, + out("x13") _, + out("x3") _, + out("x4") _, + out("x5") _, + out("x6") _, + out("x7") _, + out("x8") _, + out("x9") _, + ) + }; +} diff --git a/graviola/src/low/aarch64/bignum_neg_p25519.rs b/graviola/src/low/aarch64/bignum_neg_p25519.rs new file mode 100644 index 000000000..78b150a82 --- /dev/null +++ b/graviola/src/low/aarch64/bignum_neg_p25519.rs @@ -0,0 +1,108 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced +// Input x[4]; output z[4] +// +// extern void bignum_neg_p25519(uint64_t z[static 4], const uint64_t x[static 4]); +// +// Standard ARM ABI: X0 = z, X1 = x +// ---------------------------------------------------------------------------- + +macro_rules! z { + () => { + "x0" + }; +} +macro_rules! x { + () => { + "x1" + }; +} + +macro_rules! d0 { + () => { + "x2" + }; +} +macro_rules! d1 { + () => { + "x3" + }; +} +macro_rules! d2 { + () => { + "x4" + }; +} +macro_rules! d3 { + () => { + "x5" + }; +} +macro_rules! c { + () => { + "x6" + }; +} +macro_rules! d { + () => { + "x7" + }; +} + +/// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced +/// +/// Input x[4]; output z[4] +pub(crate) fn bignum_neg_p25519(z: &mut [u64; 4], x: &[u64; 4]) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + + // Load the digits of x and compute [d3;d2;d1;d0] = (2^255 - 19) - x + // while also computing c = the OR of the digits of x + + Q!(" ldp " d0!() ", " d1!() ", [" x!() "]"), + Q!(" mov " d!() ", #-19"), + Q!(" orr " c!() ", " d0!() ", " d1!()), + Q!(" subs " d0!() ", " d!() ", " d0!()), + Q!(" mov " d!() ", #-1"), + Q!(" sbcs " d1!() ", " d!() ", " d1!()), + Q!(" ldp " d2!() ", " d3!() ", [" x!() ", #16]"), + Q!(" orr " c!() ", " c!() ", " d2!()), + Q!(" sbcs " d2!() ", " d!() ", " d2!()), + Q!(" mov " d!() ", #0x7FFFFFFFFFFFFFFF"), + Q!(" orr " c!() ", " c!() ", " d3!()), + Q!(" sbc " d3!() ", " d!() ", " d3!()), + + // If in fact c = 0 then the result is zero, otherwise the main result + + Q!(" cmp " c!() ", xzr"), + Q!(" csel " d0!() ", " d0!() ", xzr, ne"), + Q!(" csel " d1!() ", " d1!() ", xzr, ne"), + Q!(" csel " d2!() ", " d2!() ", xzr, ne"), + Q!(" csel " d3!() ", " d3!() ", xzr, ne"), + + // Write back result and return + + Q!(" stp " d0!() ", " d1!() ", [" z!() "]"), + Q!(" stp " d2!() ", " d3!() ", [" z!() ", #16]"), + + inout("x0") z.as_mut_ptr() => _, + inout("x1") x.as_ptr() => _, + // clobbers + out("x2") _, + out("x3") _, + out("x4") _, + out("x5") _, + out("x6") _, + out("x7") _, + ) + }; +} diff --git a/graviola/src/low/aarch64/edwards25519_decode.rs b/graviola/src/low/aarch64/edwards25519_decode.rs new file mode 100644 index 000000000..e109098c5 --- /dev/null +++ b/graviola/src/low/aarch64/edwards25519_decode.rs @@ -0,0 +1,637 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Decode compressed 256-bit form of edwards25519 point +// Input c[32] (bytes); output function return and z[8] +// +// extern uint64_t edwards25519_decode_alt(uint64_t z[static 8], const uint8_t c[static 32]); +// +// This interprets the input byte string as a little-endian number +// representing a point (x,y) on the edwards25519 curve, encoded as +// 2^255 * x_0 + y where x_0 is the least significant bit of x. It +// returns the full pair of coordinates x (at z) and y (at z+4). The +// return code is 0 for success and 1 for failure, which means that +// the input does not correspond to the encoding of any edwards25519 +// point. This can happen for three reasons, where y = the lowest +// 255 bits of the input: +// +// * y >= p_25519 +// Input y coordinate is not reduced +// * (y^2 - 1) * (1 + d_25519 * y^2) has no modular square root +// There is no x such that (x,y) is on the curve +// * y^2 = 1 and top bit of input is set +// Cannot be the canonical encoding of (0,1) or (0,-1) +// +// Standard ARM ABI: X0 = z, X1 = c +// ---------------------------------------------------------------------------- + +// Size in bytes of a 64-bit word + +macro_rules! N { + () => { + "8" + }; +} + +// Pointer-offset pairs for temporaries on stack + +macro_rules! y { + () => { + "sp, #0" + }; +} +macro_rules! s { () => { Q!("sp, # (4 * " N!() ")") } } +macro_rules! t { () => { Q!("sp, # (8 * " N!() ")") } } +macro_rules! u { () => { Q!("sp, # (12 * " N!() ")") } } +macro_rules! v { () => { Q!("sp, # (16 * " N!() ")") } } +macro_rules! w { () => { Q!("sp, # (20 * " N!() ")") } } + +// Other temporary variables in register + +macro_rules! res { + () => { + "x19" + }; +} +macro_rules! sgnbit { + () => { + "x20" + }; +} +macro_rules! badun { + () => { + "x21" + }; +} + +// Total size to reserve on the stack + +macro_rules! NSPACE { () => { Q!("# (24 * " N!() ")") } } + +// Loading large constants + +macro_rules! movbig { + ($nn:expr, $n3:expr, $n2:expr, $n1:expr, $n0:expr) => { Q!( + "movz " $nn ", " $n0 ";\n" + "movk " $nn ", " $n1 ", lsl #16;\n" + "movk " $nn ", " $n2 ", lsl #32;\n" + "movk " $nn ", " $n3 ", lsl #48" + )} +} + +// Macros wrapping up calls to the local subroutines + +macro_rules! mulp { + ($dest:expr, $src1:expr, $src2:expr) => { Q!( + "add x0, " $dest ";\n" + "add x1, " $src1 ";\n" + "add x2, " $src2 ";\n" + "bl " Label!("edwards25519_decode_alt_mul_p25519", 3, After) + )} +} + +macro_rules! nsqr { + ($dest:expr, $n:expr, $src:expr) => { Q!( + "add x0, " $dest ";\n" + "mov x1, " $n ";\n" + "add x2, " $src ";\n" + "bl " Label!("edwards25519_decode_alt_nsqr_p25519", 4, After) + )} +} + +/// Decode compressed 256-bit form of edwards25519 point +/// +/// Input c[32] (bytes); output function return and z[8] +/// +/// This interprets the input byte string as a little-endian number +/// representing a point (x,y) on the edwards25519 curve, encoded as +/// 2^255 * x_0 + y where x_0 is the least significant bit of x. It +/// returns the full pair of coordinates x (at z) and y (at z+4). The +/// return code is 0 for success and 1 for failure, which means that +/// the input does not correspond to the encoding of any edwards25519 +/// point. This can happen for three reasons, where y = the lowest +/// 255 bits of the input: +/// +/// * y >= p_25519 +/// Input y coordinate is not reduced +/// * (y^2 - 1) * (1 + d_25519 * y^2) has no modular square root +/// There is no x such that (x,y) is on the curve +/// * y^2 = 1 and top bit of input is set +/// Cannot be the canonical encoding of (0,1) or (0,-1) +pub(crate) fn edwards25519_decode(z: &mut [u64; 8], c: &[u8; 32]) -> bool { + let ret: u64; + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + + // Save registers and make room for temporaries + + Q!(" stp " "x19, x20, [sp, -16] !"), + Q!(" stp " "x21, x30, [sp, -16] !"), + Q!(" sub " "sp, sp, " NSPACE!()), + + // Save the return pointer for the end so we can overwrite x0 later + + Q!(" mov " res!() ", x0"), + + // Load the inputs, using byte operations in case of big-endian setting. + // Let y be the lowest 255 bits of the input and sgnbit the desired parity. + // If y >= p_25519 then already flag the input as invalid (badun = 1). + + Q!(" ldrb " "w0, [x1]"), + Q!(" lsl " "x4, x0, #56"), + Q!(" ldrb " "w0, [x1, #1]"), + Q!(" extr " "x4, x0, x4, #8"), + Q!(" ldrb " "w0, [x1, #2]"), + Q!(" extr " "x4, x0, x4, #8"), + Q!(" ldrb " "w0, [x1, #3]"), + Q!(" extr " "x4, x0, x4, #8"), + Q!(" ldrb " "w0, [x1, #4]"), + Q!(" extr " "x4, x0, x4, #8"), + Q!(" ldrb " "w0, [x1, #5]"), + Q!(" extr " "x4, x0, x4, #8"), + Q!(" ldrb " "w0, [x1, #6]"), + Q!(" extr " "x4, x0, x4, #8"), + Q!(" ldrb " "w0, [x1, #7]"), + Q!(" extr " "x4, x0, x4, #8"), + + Q!(" ldrb " "w0, [x1, #8]"), + Q!(" lsl " "x5, x0, #56"), + Q!(" ldrb " "w0, [x1, #9]"), + Q!(" extr " "x5, x0, x5, #8"), + Q!(" ldrb " "w0, [x1, #10]"), + Q!(" extr " "x5, x0, x5, #8"), + Q!(" ldrb " "w0, [x1, #11]"), + Q!(" extr " "x5, x0, x5, #8"), + Q!(" ldrb " "w0, [x1, #12]"), + Q!(" extr " "x5, x0, x5, #8"), + Q!(" ldrb " "w0, [x1, #13]"), + Q!(" extr " "x5, x0, x5, #8"), + Q!(" ldrb " "w0, [x1, #14]"), + Q!(" extr " "x5, x0, x5, #8"), + Q!(" ldrb " "w0, [x1, #15]"), + Q!(" extr " "x5, x0, x5, #8"), + + Q!(" ldrb " "w0, [x1, #16]"), + Q!(" lsl " "x6, x0, #56"), + Q!(" ldrb " "w0, [x1, #17]"), + Q!(" extr " "x6, x0, x6, #8"), + Q!(" ldrb " "w0, [x1, #18]"), + Q!(" extr " "x6, x0, x6, #8"), + Q!(" ldrb " "w0, [x1, #19]"), + Q!(" extr " "x6, x0, x6, #8"), + Q!(" ldrb " "w0, [x1, #20]"), + Q!(" extr " "x6, x0, x6, #8"), + Q!(" ldrb " "w0, [x1, #21]"), + Q!(" extr " "x6, x0, x6, #8"), + Q!(" ldrb " "w0, [x1, #22]"), + Q!(" extr " "x6, x0, x6, #8"), + Q!(" ldrb " "w0, [x1, #23]"), + Q!(" extr " "x6, x0, x6, #8"), + + Q!(" ldrb " "w0, [x1, #24]"), + Q!(" lsl " "x7, x0, #56"), + Q!(" ldrb " "w0, [x1, #25]"), + Q!(" extr " "x7, x0, x7, #8"), + Q!(" ldrb " "w0, [x1, #26]"), + Q!(" extr " "x7, x0, x7, #8"), + Q!(" ldrb " "w0, [x1, #27]"), + Q!(" extr " "x7, x0, x7, #8"), + Q!(" ldrb " "w0, [x1, #28]"), + Q!(" extr " "x7, x0, x7, #8"), + Q!(" ldrb " "w0, [x1, #29]"), + Q!(" extr " "x7, x0, x7, #8"), + Q!(" ldrb " "w0, [x1, #30]"), + Q!(" extr " "x7, x0, x7, #8"), + Q!(" ldrb " "w0, [x1, #31]"), + Q!(" extr " "x7, x0, x7, #8"), + + Q!(" stp " "x4, x5, [" y!() "]"), + Q!(" lsr " sgnbit!() ", x7, #63"), + Q!(" and " "x7, x7, #0x7FFFFFFFFFFFFFFF"), + Q!(" stp " "x6, x7, [" y!() "+ 16]"), + + Q!(" adds " "xzr, x4, #19"), + Q!(" adcs " "xzr, x5, xzr"), + Q!(" adcs " "xzr, x6, xzr"), + Q!(" adcs " "xzr, x7, xzr"), + Q!(" cset " badun!() ", mi"), + + // u = y^2 - 1 (actually y + 2^255-20, not reduced modulo) + // v = 1 + d * y^2 (not reduced modulo from the +1) + // w = u * v + + nsqr!(v!(), "1", y!()), + Q!(" ldp " "x0, x1, [" v!() "]"), + Q!(" ldp " "x2, x3, [" v!() "+ 16]"), + Q!(" mov " "x4, #0x8000000000000000"), + Q!(" subs " "x0, x0, #20"), + Q!(" sbcs " "x1, x1, xzr"), + Q!(" sbcs " "x2, x2, xzr"), + Q!(" sbc " "x3, x3, x4"), + Q!(" stp " "x0, x1, [" u!() "]"), + Q!(" stp " "x2, x3, [" u!() "+ 16]"), + + movbig!("x0", "#0x75eb", "#0x4dca", "#0x1359", "#0x78a3"), + movbig!("x1", "#0x0070", "#0x0a4d", "#0x4141", "#0xd8ab"), + movbig!("x2", "#0x8cc7", "#0x4079", "#0x7779", "#0xe898"), + movbig!("x3", "#0x5203", "#0x6cee", "#0x2b6f", "#0xfe73"), + Q!(" stp " "x0, x1, [" w!() "]"), + Q!(" stp " "x2, x3, [" w!() "+ 16]"), + mulp!(v!(), w!(), v!()), + Q!(" ldp " "x0, x1, [" v!() "]"), + Q!(" ldp " "x2, x3, [" v!() "+ 16]"), + Q!(" adds " "x0, x0, #1"), + Q!(" adcs " "x1, x1, xzr"), + Q!(" adcs " "x2, x2, xzr"), + Q!(" adcs " "x3, x3, xzr"), + Q!(" stp " "x0, x1, [" v!() "]"), + Q!(" stp " "x2, x3, [" v!() "+ 16]"), + + mulp!(w!(), u!(), v!()), + + // Get s = w^{252-3} as a candidate inverse square root 1/sqrt(w). + // This power tower computation is the same as bignum_invsqrt_p25519 + + nsqr!(t!(), "1", w!()), + mulp!(t!(), t!(), w!()), + nsqr!(s!(), "2", t!()), + mulp!(t!(), s!(), t!()), + nsqr!(s!(), "1", t!()), + mulp!(v!(), s!(), w!()), + nsqr!(s!(), "5", v!()), + mulp!(t!(), s!(), v!()), + nsqr!(s!(), "10", t!()), + mulp!(t!(), s!(), t!()), + nsqr!(s!(), "5", t!()), + mulp!(v!(), s!(), v!()), + nsqr!(s!(), "25", v!()), + mulp!(t!(), s!(), v!()), + nsqr!(s!(), "50", t!()), + mulp!(t!(), s!(), t!()), + nsqr!(s!(), "25", t!()), + mulp!(v!(), s!(), v!()), + nsqr!(s!(), "125", v!()), + mulp!(v!(), s!(), v!()), + nsqr!(s!(), "2", v!()), + mulp!(s!(), s!(), w!()), + + // Compute v' = s^2 * w to discriminate whether the square root sqrt(u/v) + // exists, in which case we should get 0, 1 or -1. + + nsqr!(v!(), "1", s!()), + mulp!(v!(), v!(), w!()), + + // Get the two candidates for sqrt(u / v), one being s = u * w^{252-3} + // and the other being t = s * j_25519 where j_25519 = sqrt(-1). + + mulp!(s!(), u!(), s!()), + movbig!("x0", "#0xc4ee", "#0x1b27", "#0x4a0e", "#0xa0b0"), + movbig!("x1", "#0x2f43", "#0x1806", "#0xad2f", "#0xe478"), + movbig!("x2", "#0x2b4d", "#0x0099", "#0x3dfb", "#0xd7a7"), + movbig!("x3", "#0x2b83", "#0x2480", "#0x4fc1", "#0xdf0b"), + Q!(" stp " "x0, x1, [" t!() "]"), + Q!(" stp " "x2, x3, [" t!() "+ 16]"), + mulp!(t!(), s!(), t!()), + + // x4 = 0 <=> s^2 * w = 0 or 1 + + Q!(" ldp " "x0, x1, [" v!() "]"), + Q!(" ldp " "x2, x3, [" v!() "+ 16]"), + Q!(" bic " "x4, x0, #1"), + Q!(" orr " "x4, x4, x1"), + Q!(" orr " "x5, x2, x3"), + Q!(" orr " "x4, x4, x5"), + + // x0 = 0 <=> s^2 * w = -1 (mod p_25519, i.e. s^2 * w = 2^255 - 20) + + Q!(" add " "x0, x0, #20"), + Q!(" add " "x1, x1, #1"), + Q!(" orr " "x0, x0, x1"), + Q!(" add " "x2, x2, #1"), + Q!(" eor " "x3, x3, #0x7FFFFFFFFFFFFFFF"), + Q!(" orr " "x2, x2, x3"), + Q!(" orr " "x0, x0, x2"), + + // If s^2 * w is not 0 or 1 then replace s by t + + Q!(" cmp " "x4, xzr"), + Q!(" ldp " "x10, x11, [" s!() "]"), + Q!(" ldp " "x14, x15, [" t!() "]"), + Q!(" csel " "x10, x10, x14, eq"), + Q!(" csel " "x11, x11, x15, eq"), + Q!(" ldp " "x12, x13, [" s!() "+ 16]"), + Q!(" ldp " "x16, x17, [" t!() "+ 16]"), + Q!(" csel " "x12, x12, x16, eq"), + Q!(" csel " "x13, x13, x17, eq"), + Q!(" stp " "x10, x11, [" s!() "]"), + Q!(" stp " "x12, x13, [" s!() "+ 16]"), + + // Check invalidity, occurring if s^2 * w is not in {0,1,-1} + + Q!(" ccmp " "x0, xzr, 4, ne"), + Q!(" cset " "x0, ne"), + Q!(" orr " badun!() ", " badun!() ", x0"), + + // Let [x3;x2;x1;x0] = s and [x7;x6;x5;x4] = p_25519 - s + + Q!(" ldp " "x0, x1, [" s!() "]"), + Q!(" ldp " "x2, x3, [" s!() "+ 16]"), + Q!(" mov " "x4, #-19"), + Q!(" subs " "x4, x4, x0"), + Q!(" mov " "x6, #-1"), + Q!(" sbcs " "x5, x6, x1"), + Q!(" sbcs " "x6, x6, x2"), + Q!(" mov " "x7, #0x7FFFFFFFFFFFFFFF"), + Q!(" sbc " "x7, x7, x3"), + + // Decide whether a flip is apparently indicated, s_0 <=> sgnbit + // Decide also if s = 0 by OR-ing its digits. Now if a flip is indicated: + // - if s = 0 then mark as invalid + // - if s <> 0 then indeed flip + + Q!(" and " "x9, x0, #1"), + Q!(" eor " sgnbit!() ", x9, " sgnbit!()), + Q!(" orr " "x8, x0, x1"), + Q!(" orr " "x9, x2, x3"), + Q!(" orr " "x8, x8, x9"), + Q!(" orr " "x10, " badun!() ", " sgnbit!()), + Q!(" cmp " "x8, xzr"), + Q!(" csel " badun!() ", x10, " badun!() ", eq"), + Q!(" ccmp " sgnbit!() ", xzr, #4, ne"), + + // Actual selection of x as s or -s, copying of y and return of validity + + Q!(" csel " "x0, x0, x4, eq"), + Q!(" csel " "x1, x1, x5, eq"), + Q!(" csel " "x2, x2, x6, eq"), + Q!(" csel " "x3, x3, x7, eq"), + Q!(" ldp " "x8, x9, [" y!() "]"), + Q!(" ldp " "x10, x11, [" y!() "+ 16]"), + + Q!(" stp " "x0, x1, [" res!() "]"), + Q!(" stp " "x2, x3, [" res!() ", #16]"), + Q!(" stp " "x8, x9, [" res!() ", #32]"), + Q!(" stp " "x10, x11, [" res!() ", #48]"), + + Q!(" mov " "x0, " badun!()), + + // Restore stack and registers + + Q!(" add " "sp, sp, " NSPACE!()), + + Q!(" ldp " "x21, x30, [sp], 16"), + Q!(" ldp " "x19, x20, [sp], 16"), + // proc hoisting in -> ret after edwards25519_decode_alt_loop + Q!(" b " Label!("hoist_finish", 2, After)), + + // ************************************************************* + // Local z = x * y + // ************************************************************* + + Q!(Label!("edwards25519_decode_alt_mul_p25519", 3) ":"), + Q!(" ldp " "x3, x4, [x1]"), + Q!(" ldp " "x7, x8, [x2]"), + Q!(" mul " "x12, x3, x7"), + Q!(" umulh " "x13, x3, x7"), + Q!(" mul " "x11, x3, x8"), + Q!(" umulh " "x14, x3, x8"), + Q!(" adds " "x13, x13, x11"), + Q!(" ldp " "x9, x10, [x2, #16]"), + Q!(" mul " "x11, x3, x9"), + Q!(" umulh " "x15, x3, x9"), + Q!(" adcs " "x14, x14, x11"), + Q!(" mul " "x11, x3, x10"), + Q!(" umulh " "x16, x3, x10"), + Q!(" adcs " "x15, x15, x11"), + Q!(" adc " "x16, x16, xzr"), + Q!(" ldp " "x5, x6, [x1, #16]"), + Q!(" mul " "x11, x4, x7"), + Q!(" adds " "x13, x13, x11"), + Q!(" mul " "x11, x4, x8"), + Q!(" adcs " "x14, x14, x11"), + Q!(" mul " "x11, x4, x9"), + Q!(" adcs " "x15, x15, x11"), + Q!(" mul " "x11, x4, x10"), + Q!(" adcs " "x16, x16, x11"), + Q!(" umulh " "x3, x4, x10"), + Q!(" adc " "x3, x3, xzr"), + Q!(" umulh " "x11, x4, x7"), + Q!(" adds " "x14, x14, x11"), + Q!(" umulh " "x11, x4, x8"), + Q!(" adcs " "x15, x15, x11"), + Q!(" umulh " "x11, x4, x9"), + Q!(" adcs " "x16, x16, x11"), + Q!(" adc " "x3, x3, xzr"), + Q!(" mul " "x11, x5, x7"), + Q!(" adds " "x14, x14, x11"), + Q!(" mul " "x11, x5, x8"), + Q!(" adcs " "x15, x15, x11"), + Q!(" mul " "x11, x5, x9"), + Q!(" adcs " "x16, x16, x11"), + Q!(" mul " "x11, x5, x10"), + Q!(" adcs " "x3, x3, x11"), + Q!(" umulh " "x4, x5, x10"), + Q!(" adc " "x4, x4, xzr"), + Q!(" umulh " "x11, x5, x7"), + Q!(" adds " "x15, x15, x11"), + Q!(" umulh " "x11, x5, x8"), + Q!(" adcs " "x16, x16, x11"), + Q!(" umulh " "x11, x5, x9"), + Q!(" adcs " "x3, x3, x11"), + Q!(" adc " "x4, x4, xzr"), + Q!(" mul " "x11, x6, x7"), + Q!(" adds " "x15, x15, x11"), + Q!(" mul " "x11, x6, x8"), + Q!(" adcs " "x16, x16, x11"), + Q!(" mul " "x11, x6, x9"), + Q!(" adcs " "x3, x3, x11"), + Q!(" mul " "x11, x6, x10"), + Q!(" adcs " "x4, x4, x11"), + Q!(" umulh " "x5, x6, x10"), + Q!(" adc " "x5, x5, xzr"), + Q!(" umulh " "x11, x6, x7"), + Q!(" adds " "x16, x16, x11"), + Q!(" umulh " "x11, x6, x8"), + Q!(" adcs " "x3, x3, x11"), + Q!(" umulh " "x11, x6, x9"), + Q!(" adcs " "x4, x4, x11"), + Q!(" adc " "x5, x5, xzr"), + Q!(" mov " "x7, #38"), + Q!(" mul " "x11, x7, x16"), + Q!(" umulh " "x9, x7, x16"), + Q!(" adds " "x12, x12, x11"), + Q!(" mul " "x11, x7, x3"), + Q!(" umulh " "x3, x7, x3"), + Q!(" adcs " "x13, x13, x11"), + Q!(" mul " "x11, x7, x4"), + Q!(" umulh " "x4, x7, x4"), + Q!(" adcs " "x14, x14, x11"), + Q!(" mul " "x11, x7, x5"), + Q!(" umulh " "x5, x7, x5"), + Q!(" adcs " "x15, x15, x11"), + Q!(" cset " "x16, hs"), + Q!(" adds " "x15, x15, x4"), + Q!(" adc " "x16, x16, x5"), + Q!(" cmn " "x15, x15"), + Q!(" orr " "x15, x15, #0x8000000000000000"), + Q!(" adc " "x8, x16, x16"), + Q!(" mov " "x7, #19"), + Q!(" madd " "x11, x7, x8, x7"), + Q!(" adds " "x12, x12, x11"), + Q!(" adcs " "x13, x13, x9"), + Q!(" adcs " "x14, x14, x3"), + Q!(" adcs " "x15, x15, xzr"), + Q!(" csel " "x7, x7, xzr, lo"), + Q!(" subs " "x12, x12, x7"), + Q!(" sbcs " "x13, x13, xzr"), + Q!(" sbcs " "x14, x14, xzr"), + Q!(" sbc " "x15, x15, xzr"), + Q!(" and " "x15, x15, #0x7fffffffffffffff"), + Q!(" stp " "x12, x13, [x0]"), + Q!(" stp " "x14, x15, [x0, #16]"), + Q!(" ret " ), + + // ************************************************************* + // Local z = 2^n * x + // ************************************************************* + + Q!(Label!("edwards25519_decode_alt_nsqr_p25519", 4) ":"), + + // Copy input argument into [x5;x4;x3;x2] (overwriting input pointer x20 + + Q!(" ldp " "x6, x3, [x2]"), + Q!(" ldp " "x4, x5, [x2, #16]"), + Q!(" mov " "x2, x6"), + + // Main squaring loop, accumulating in [x5;x4;x3;x2] consistently and + // only ensuring the intermediates are < 2 * p_25519 = 2^256 - 38 + + Q!(Label!("edwards25519_decode_alt_loop", 5) ":"), + Q!(" mul " "x9, x2, x3"), + Q!(" umulh " "x10, x2, x3"), + Q!(" mul " "x11, x2, x5"), + Q!(" umulh " "x12, x2, x5"), + Q!(" mul " "x7, x2, x4"), + Q!(" umulh " "x6, x2, x4"), + Q!(" adds " "x10, x10, x7"), + Q!(" adcs " "x11, x11, x6"), + Q!(" mul " "x7, x3, x4"), + Q!(" umulh " "x6, x3, x4"), + Q!(" adc " "x6, x6, xzr"), + Q!(" adds " "x11, x11, x7"), + Q!(" mul " "x13, x4, x5"), + Q!(" umulh " "x14, x4, x5"), + Q!(" adcs " "x12, x12, x6"), + Q!(" mul " "x7, x3, x5"), + Q!(" umulh " "x6, x3, x5"), + Q!(" adc " "x6, x6, xzr"), + Q!(" adds " "x12, x12, x7"), + Q!(" adcs " "x13, x13, x6"), + Q!(" adc " "x14, x14, xzr"), + Q!(" adds " "x9, x9, x9"), + Q!(" adcs " "x10, x10, x10"), + Q!(" adcs " "x11, x11, x11"), + Q!(" adcs " "x12, x12, x12"), + Q!(" adcs " "x13, x13, x13"), + Q!(" adcs " "x14, x14, x14"), + Q!(" cset " "x6, hs"), + Q!(" umulh " "x7, x2, x2"), + Q!(" mul " "x8, x2, x2"), + Q!(" adds " "x9, x9, x7"), + Q!(" mul " "x7, x3, x3"), + Q!(" adcs " "x10, x10, x7"), + Q!(" umulh " "x7, x3, x3"), + Q!(" adcs " "x11, x11, x7"), + Q!(" mul " "x7, x4, x4"), + Q!(" adcs " "x12, x12, x7"), + Q!(" umulh " "x7, x4, x4"), + Q!(" adcs " "x13, x13, x7"), + Q!(" mul " "x7, x5, x5"), + Q!(" adcs " "x14, x14, x7"), + Q!(" umulh " "x7, x5, x5"), + Q!(" adc " "x6, x6, x7"), + Q!(" mov " "x3, #38"), + Q!(" mul " "x7, x3, x12"), + Q!(" umulh " "x4, x3, x12"), + Q!(" adds " "x8, x8, x7"), + Q!(" mul " "x7, x3, x13"), + Q!(" umulh " "x13, x3, x13"), + Q!(" adcs " "x9, x9, x7"), + Q!(" mul " "x7, x3, x14"), + Q!(" umulh " "x14, x3, x14"), + Q!(" adcs " "x10, x10, x7"), + Q!(" mul " "x7, x3, x6"), + Q!(" umulh " "x6, x3, x6"), + Q!(" adcs " "x11, x11, x7"), + Q!(" cset " "x12, hs"), + Q!(" adds " "x11, x11, x14"), + Q!(" adc " "x12, x12, x6"), + Q!(" cmn " "x11, x11"), + Q!(" bic " "x11, x11, #0x8000000000000000"), + Q!(" adc " "x2, x12, x12"), + Q!(" mov " "x3, #0x13"), + Q!(" mul " "x7, x3, x2"), + Q!(" adds " "x2, x8, x7"), + Q!(" adcs " "x3, x9, x4"), + Q!(" adcs " "x4, x10, x13"), + Q!(" adc " "x5, x11, xzr"), + + // Loop as applicable + + Q!(" subs " "x1, x1, #1"), + Q!(" bne " Label!("edwards25519_decode_alt_loop", 5, Before)), + + // We know the intermediate result x < 2^256 - 38, and now we do strict + // modular reduction mod 2^255 - 19. Note x < 2^255 - 19 <=> x + 19 < 2^255 + // which is equivalent to a "pl" condition. + + Q!(" adds " "x6, x2, #19"), + Q!(" adcs " "x7, x3, xzr"), + Q!(" adcs " "x8, x4, xzr"), + Q!(" adcs " "x9, x5, xzr"), + + Q!(" csel " "x2, x2, x6, pl"), + Q!(" csel " "x3, x3, x7, pl"), + Q!(" csel " "x4, x4, x8, pl"), + Q!(" csel " "x5, x5, x9, pl"), + Q!(" bic " "x5, x5, #0x8000000000000000"), + + // Copy result back into destination and return + + Q!(" stp " "x2, x3, [x0]"), + Q!(" stp " "x4, x5, [x0, #16]"), + Q!(" ret " ), + Q!(Label!("hoist_finish", 2) ":"), + inout("x0") z.as_mut_ptr() => ret, + inout("x1") c.as_ptr() => _, + // clobbers + out("x10") _, + out("x11") _, + out("x12") _, + out("x13") _, + out("x14") _, + out("x15") _, + out("x16") _, + out("x17") _, + out("x2") _, + out("x20") _, + out("x21") _, + out("x3") _, + out("x30") _, + out("x4") _, + out("x5") _, + out("x6") _, + out("x7") _, + out("x8") _, + out("x9") _, + ) + }; + ret == 0 +} diff --git a/graviola/src/low/aarch64/edwards25519_scalarmulbase.rs b/graviola/src/low/aarch64/edwards25519_scalarmulbase.rs new file mode 100644 index 000000000..9bd18082e --- /dev/null +++ b/graviola/src/low/aarch64/edwards25519_scalarmulbase.rs @@ -0,0 +1,8547 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Scalar multiplication for the edwards25519 standard basepoint +// Input scalar[4]; output res[8] +// +// extern void edwards25519_scalarmulbase_alt +// (uint64_t res[static 8],const uint64_t scalar[static 4]); +// +// Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Standard ARM ABI: X0 = res, X1 = scalar +// ---------------------------------------------------------------------------- + +// Size of individual field elements + +macro_rules! NUMSIZE { + () => { + "32" + }; +} + +// Stable home for the input result argument during the whole body + +macro_rules! res { + () => { + "x23" + }; +} + +// Other variables that are only needed prior to the modular inverse. + +macro_rules! tab { + () => { + "x19" + }; +} + +macro_rules! i { + () => { + "x20" + }; +} + +macro_rules! bias { + () => { + "x21" + }; +} + +macro_rules! bf { + () => { + "x22" + }; +} +macro_rules! ix { + () => { + "x22" + }; +} + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +macro_rules! resx { () => { Q!(res!() ", # (0 * " NUMSIZE!() ")") } } +macro_rules! resy { () => { Q!(res!() ", # (1 * " NUMSIZE!() ")") } } + +macro_rules! scalar { () => { Q!("sp, # (0 * " NUMSIZE!() ")") } } + +macro_rules! tabent { () => { Q!("sp, # (1 * " NUMSIZE!() ")") } } +macro_rules! ymx_2 { () => { Q!("sp, # (1 * " NUMSIZE!() ")") } } +macro_rules! xpy_2 { () => { Q!("sp, # (2 * " NUMSIZE!() ")") } } +macro_rules! kxy_2 { () => { Q!("sp, # (3 * " NUMSIZE!() ")") } } + +macro_rules! acc { () => { Q!("sp, # (4 * " NUMSIZE!() ")") } } +macro_rules! x_1 { () => { Q!("sp, # (4 * " NUMSIZE!() ")") } } +macro_rules! y_1 { () => { Q!("sp, # (5 * " NUMSIZE!() ")") } } +macro_rules! z_1 { () => { Q!("sp, # (6 * " NUMSIZE!() ")") } } +macro_rules! w_1 { () => { Q!("sp, # (7 * " NUMSIZE!() ")") } } +macro_rules! x_3 { () => { Q!("sp, # (4 * " NUMSIZE!() ")") } } +macro_rules! y_3 { () => { Q!("sp, # (5 * " NUMSIZE!() ")") } } +macro_rules! z_3 { () => { Q!("sp, # (6 * " NUMSIZE!() ")") } } +macro_rules! w_3 { () => { Q!("sp, # (7 * " NUMSIZE!() ")") } } + +macro_rules! tmpspace { () => { Q!("sp, # (8 * " NUMSIZE!() ")") } } +macro_rules! t0 { () => { Q!("sp, # (8 * " NUMSIZE!() ")") } } +macro_rules! t1 { () => { Q!("sp, # (9 * " NUMSIZE!() ")") } } +macro_rules! t2 { () => { Q!("sp, # (10 * " NUMSIZE!() ")") } } +macro_rules! t3 { () => { Q!("sp, # (11 * " NUMSIZE!() ")") } } +macro_rules! t4 { () => { Q!("sp, # (12 * " NUMSIZE!() ")") } } +macro_rules! t5 { () => { Q!("sp, # (13 * " NUMSIZE!() ")") } } + +// Total size to reserve on the stack + +macro_rules! NSPACE { () => { Q!("(14 * " NUMSIZE!() ")") } } + +// Load 64-bit immediate into a register + +macro_rules! movbig { + ($nn:expr, $n3:expr, $n2:expr, $n1:expr, $n0:expr) => { Q!( + "movz " $nn ", " $n0 ";\n" + "movk " $nn ", " $n1 ", lsl #16;\n" + "movk " $nn ", " $n2 ", lsl #32;\n" + "movk " $nn ", " $n3 ", lsl #48" + )} +} + +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. + +macro_rules! mul_p25519 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "ldp x3, x4, [" $P1 "];\n" + "ldp x7, x8, [" $P2 "];\n" + "mul x12, x3, x7;\n" + "umulh x13, x3, x7;\n" + "mul x11, x3, x8;\n" + "umulh x14, x3, x8;\n" + "adds x13, x13, x11;\n" + "ldp x9, x10, [" $P2 "+ 16];\n" + "mul x11, x3, x9;\n" + "umulh x15, x3, x9;\n" + "adcs x14, x14, x11;\n" + "mul x11, x3, x10;\n" + "umulh x16, x3, x10;\n" + "adcs x15, x15, x11;\n" + "adc x16, x16, xzr;\n" + "ldp x5, x6, [" $P1 "+ 16];\n" + "mul x11, x4, x7;\n" + "adds x13, x13, x11;\n" + "mul x11, x4, x8;\n" + "adcs x14, x14, x11;\n" + "mul x11, x4, x9;\n" + "adcs x15, x15, x11;\n" + "mul x11, x4, x10;\n" + "adcs x16, x16, x11;\n" + "umulh x3, x4, x10;\n" + "adc x3, x3, xzr;\n" + "umulh x11, x4, x7;\n" + "adds x14, x14, x11;\n" + "umulh x11, x4, x8;\n" + "adcs x15, x15, x11;\n" + "umulh x11, x4, x9;\n" + "adcs x16, x16, x11;\n" + "adc x3, x3, xzr;\n" + "mul x11, x5, x7;\n" + "adds x14, x14, x11;\n" + "mul x11, x5, x8;\n" + "adcs x15, x15, x11;\n" + "mul x11, x5, x9;\n" + "adcs x16, x16, x11;\n" + "mul x11, x5, x10;\n" + "adcs x3, x3, x11;\n" + "umulh x4, x5, x10;\n" + "adc x4, x4, xzr;\n" + "umulh x11, x5, x7;\n" + "adds x15, x15, x11;\n" + "umulh x11, x5, x8;\n" + "adcs x16, x16, x11;\n" + "umulh x11, x5, x9;\n" + "adcs x3, x3, x11;\n" + "adc x4, x4, xzr;\n" + "mul x11, x6, x7;\n" + "adds x15, x15, x11;\n" + "mul x11, x6, x8;\n" + "adcs x16, x16, x11;\n" + "mul x11, x6, x9;\n" + "adcs x3, x3, x11;\n" + "mul x11, x6, x10;\n" + "adcs x4, x4, x11;\n" + "umulh x5, x6, x10;\n" + "adc x5, x5, xzr;\n" + "umulh x11, x6, x7;\n" + "adds x16, x16, x11;\n" + "umulh x11, x6, x8;\n" + "adcs x3, x3, x11;\n" + "umulh x11, x6, x9;\n" + "adcs x4, x4, x11;\n" + "adc x5, x5, xzr;\n" + "mov x7, #0x26;\n" + "mul x11, x7, x16;\n" + "umulh x9, x7, x16;\n" + "adds x12, x12, x11;\n" + "mul x11, x7, x3;\n" + "umulh x3, x7, x3;\n" + "adcs x13, x13, x11;\n" + "mul x11, x7, x4;\n" + "umulh x4, x7, x4;\n" + "adcs x14, x14, x11;\n" + "mul x11, x7, x5;\n" + "umulh x5, x7, x5;\n" + "adcs x15, x15, x11;\n" + "cset x16, cs;\n" + "adds x15, x15, x4;\n" + "adc x16, x16, x5;\n" + "cmn x15, x15;\n" + "orr x15, x15, #0x8000000000000000;\n" + "adc x8, x16, x16;\n" + "mov x7, #0x13;\n" + "madd x11, x7, x8, x7;\n" + "adds x12, x12, x11;\n" + "adcs x13, x13, x9;\n" + "adcs x14, x14, x3;\n" + "adcs x15, x15, xzr;\n" + "csel x7, x7, xzr, cc;\n" + "subs x12, x12, x7;\n" + "sbcs x13, x13, xzr;\n" + "sbcs x14, x14, xzr;\n" + "sbc x15, x15, xzr;\n" + "and x15, x15, #0x7fffffffffffffff;\n" + "stp x12, x13, [" $P0 "];\n" + "stp x14, x15, [" $P0 "+ 16]" + )} +} + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +macro_rules! mul_4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "ldp x3, x4, [" $P1 "];\n" + "ldp x7, x8, [" $P2 "];\n" + "mul x12, x3, x7;\n" + "umulh x13, x3, x7;\n" + "mul x11, x3, x8;\n" + "umulh x14, x3, x8;\n" + "adds x13, x13, x11;\n" + "ldp x9, x10, [" $P2 "+ 16];\n" + "mul x11, x3, x9;\n" + "umulh x15, x3, x9;\n" + "adcs x14, x14, x11;\n" + "mul x11, x3, x10;\n" + "umulh x16, x3, x10;\n" + "adcs x15, x15, x11;\n" + "adc x16, x16, xzr;\n" + "ldp x5, x6, [" $P1 "+ 16];\n" + "mul x11, x4, x7;\n" + "adds x13, x13, x11;\n" + "mul x11, x4, x8;\n" + "adcs x14, x14, x11;\n" + "mul x11, x4, x9;\n" + "adcs x15, x15, x11;\n" + "mul x11, x4, x10;\n" + "adcs x16, x16, x11;\n" + "umulh x3, x4, x10;\n" + "adc x3, x3, xzr;\n" + "umulh x11, x4, x7;\n" + "adds x14, x14, x11;\n" + "umulh x11, x4, x8;\n" + "adcs x15, x15, x11;\n" + "umulh x11, x4, x9;\n" + "adcs x16, x16, x11;\n" + "adc x3, x3, xzr;\n" + "mul x11, x5, x7;\n" + "adds x14, x14, x11;\n" + "mul x11, x5, x8;\n" + "adcs x15, x15, x11;\n" + "mul x11, x5, x9;\n" + "adcs x16, x16, x11;\n" + "mul x11, x5, x10;\n" + "adcs x3, x3, x11;\n" + "umulh x4, x5, x10;\n" + "adc x4, x4, xzr;\n" + "umulh x11, x5, x7;\n" + "adds x15, x15, x11;\n" + "umulh x11, x5, x8;\n" + "adcs x16, x16, x11;\n" + "umulh x11, x5, x9;\n" + "adcs x3, x3, x11;\n" + "adc x4, x4, xzr;\n" + "mul x11, x6, x7;\n" + "adds x15, x15, x11;\n" + "mul x11, x6, x8;\n" + "adcs x16, x16, x11;\n" + "mul x11, x6, x9;\n" + "adcs x3, x3, x11;\n" + "mul x11, x6, x10;\n" + "adcs x4, x4, x11;\n" + "umulh x5, x6, x10;\n" + "adc x5, x5, xzr;\n" + "umulh x11, x6, x7;\n" + "adds x16, x16, x11;\n" + "umulh x11, x6, x8;\n" + "adcs x3, x3, x11;\n" + "umulh x11, x6, x9;\n" + "adcs x4, x4, x11;\n" + "adc x5, x5, xzr;\n" + "mov x7, #0x26;\n" + "mul x11, x7, x16;\n" + "umulh x9, x7, x16;\n" + "adds x12, x12, x11;\n" + "mul x11, x7, x3;\n" + "umulh x3, x7, x3;\n" + "adcs x13, x13, x11;\n" + "mul x11, x7, x4;\n" + "umulh x4, x7, x4;\n" + "adcs x14, x14, x11;\n" + "mul x11, x7, x5;\n" + "umulh x5, x7, x5;\n" + "adcs x15, x15, x11;\n" + "cset x16, cs;\n" + "adds x15, x15, x4;\n" + "adc x16, x16, x5;\n" + "cmn x15, x15;\n" + "bic x15, x15, #0x8000000000000000;\n" + "adc x8, x16, x16;\n" + "mov x7, #0x13;\n" + "mul x11, x7, x8;\n" + "adds x12, x12, x11;\n" + "adcs x13, x13, x9;\n" + "adcs x14, x14, x3;\n" + "adc x15, x15, xzr;\n" + "stp x12, x13, [" $P0 "];\n" + "stp x14, x15, [" $P0 "+ 16]" + )} +} + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +macro_rules! sub_twice4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "ldp x5, x6, [" $P1 "];\n" + "ldp x4, x3, [" $P2 "];\n" + "subs x5, x5, x4;\n" + "sbcs x6, x6, x3;\n" + "ldp x7, x8, [" $P1 "+ 16];\n" + "ldp x4, x3, [" $P2 "+ 16];\n" + "sbcs x7, x7, x4;\n" + "sbcs x8, x8, x3;\n" + "mov x4, #38;\n" + "csel x3, x4, xzr, lo;\n" + "subs x5, x5, x3;\n" + "sbcs x6, x6, xzr;\n" + "sbcs x7, x7, xzr;\n" + "sbc x8, x8, xzr;\n" + "stp x5, x6, [" $P0 "];\n" + "stp x7, x8, [" $P0 "+ 16]" + )} +} + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +macro_rules! add_twice4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "ldp x3, x4, [" $P1 "];\n" + "ldp x7, x8, [" $P2 "];\n" + "adds x3, x3, x7;\n" + "adcs x4, x4, x8;\n" + "ldp x5, x6, [" $P1 "+ 16];\n" + "ldp x7, x8, [" $P2 "+ 16];\n" + "adcs x5, x5, x7;\n" + "adcs x6, x6, x8;\n" + "mov x9, #38;\n" + "csel x9, x9, xzr, cs;\n" + "adds x3, x3, x9;\n" + "adcs x4, x4, xzr;\n" + "adcs x5, x5, xzr;\n" + "adc x6, x6, xzr;\n" + "stp x3, x4, [" $P0 "];\n" + "stp x5, x6, [" $P0 "+ 16]" + )} +} + +macro_rules! double_twice4 { + ($P0:expr, $P1:expr) => { Q!( + "ldp x3, x4, [" $P1 "];\n" + "adds x3, x3, x3;\n" + "adcs x4, x4, x4;\n" + "ldp x5, x6, [" $P1 "+ 16];\n" + "adcs x5, x5, x5;\n" + "adcs x6, x6, x6;\n" + "mov x9, #38;\n" + "csel x9, x9, xzr, cs;\n" + "adds x3, x3, x9;\n" + "adcs x4, x4, xzr;\n" + "adcs x5, x5, xzr;\n" + "adc x6, x6, xzr;\n" + "stp x3, x4, [" $P0 "];\n" + "stp x5, x6, [" $P0 "+ 16]" + )} +} + +/// Scalar multiplication for the edwards25519 standard basepoint +/// +/// Input scalar[4]; output res[8] +/// +/// Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is +/// the standard basepoint for the edwards25519 (Ed25519) curve. +pub(crate) fn edwards25519_scalarmulbase(res: &mut [u64; 8], scalar: &[u64; 4]) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + + // Save regs and make room for temporaries + + Q!(" stp " "x19, x20, [sp, -16] !"), + Q!(" stp " "x21, x22, [sp, -16] !"), + Q!(" stp " "x23, x24, [sp, -16] !"), + Q!(" sub " "sp, sp, # " NSPACE!()), + + // Move the output pointer to a stable place + + Q!(" mov " res!() ", x0"), + + // Copy the input scalar x to its local variable while reducing it + // modulo 2^252 + m where m = 27742317777372353535851937790883648493; + // this is the order of the basepoint so this doesn't change the result. + // First do q = floor(x/2^252) and x' = x - q * (2^252 + m), which gives + // an initial result -15 * m <= x' < 2^252 + + Q!(" ldp " "x10, x11, [x1]"), + Q!(" ldp " "x12, x13, [x1, #16]"), + + Q!(" lsr " "x9, x13, #60"), + + movbig!("x0", "#0x5812", "#0x631a", "#0x5cf5", "#0xd3ed"), + movbig!("x1", "#0x14de", "#0xf9de", "#0xa2f7", "#0x9cd6"), + + Q!(" mul " "x2, x9, x0"), + Q!(" mul " "x3, x9, x1"), + Q!(" umulh " "x4, x9, x0"), + Q!(" umulh " "x5, x9, x1"), + + Q!(" adds " "x3, x3, x4"), + Q!(" adc " "x4, x5, xzr"), + Q!(" lsl " "x5, x9, #60"), + + Q!(" subs " "x10, x10, x2"), + Q!(" sbcs " "x11, x11, x3"), + Q!(" sbcs " "x12, x12, x4"), + Q!(" sbcs " "x13, x13, x5"), + + // If x' < 0 then just directly negate it; this makes sure the + // reduced argument is strictly 0 <= x' < 2^252, but now we need + // to record (done via bit 255 of the reduced scalar, which is + // ignored in the main loop) when we negated so we can flip + // the sign of the eventual point to compensate. + + Q!(" csetm " "x9, cc"), + Q!(" adds " "xzr, x9, x9"), + Q!(" eor " "x10, x10, x9"), + Q!(" adcs " "x10, x10, xzr"), + Q!(" eor " "x11, x11, x9"), + Q!(" adcs " "x11, x11, xzr"), + Q!(" eor " "x12, x12, x9"), + Q!(" adcs " "x12, x12, xzr"), + Q!(" eor " "x13, x13, x9"), + Q!(" adc " "x13, x13, xzr"), + + Q!(" and " "x9, x9, #0x8000000000000000"), + Q!(" orr " "x13, x13, x9"), + + // And before we store the scalar, test and reset bit 251 to + // initialize the main loop just below. + + Q!(" stp " "x10, x11, [" scalar!() "]"), + Q!(" tst " "x13, #0x0800000000000000"), + Q!(" bic " "x13, x13, #0x0800000000000000"), + Q!(" stp " "x12, x13, [" scalar!() "+ 16]"), + + // The main part of the computation is in extended-projective coordinates + // (X,Y,Z,T), representing an affine point on the edwards25519 curve + // (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). + // In comments B means the standard basepoint (x,4/5) = + // (0x216....f25d51a,0x6666..666658). + // + // Initialize accumulator "acc" to either 0 or 2^251 * B depending on + // bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. + + Q!(" adrp " "x10, " PageRef!("edwards25519_scalarmulbase_alt_edwards25519_0g")), + Q!(" adrp " "x11, " PageRef!("edwards25519_scalarmulbase_alt_edwards25519_251g")), + Q!(" ldp " "x0, x1, [x10]"), + Q!(" ldp " "x2, x3, [x11]"), + Q!(" csel " "x0, x0, x2, eq"), + Q!(" csel " "x1, x1, x3, eq"), + Q!(" stp " "x0, x1, [" acc!() "]"), + + Q!(" ldp " "x0, x1, [x10, 1 * 16]"), + Q!(" ldp " "x2, x3, [x11, 1 * 16]"), + Q!(" csel " "x0, x0, x2, eq"), + Q!(" csel " "x1, x1, x3, eq"), + Q!(" stp " "x0, x1, [" acc!() "+ 1 * 16]"), + + Q!(" ldp " "x0, x1, [x10, 2 * 16]"), + Q!(" ldp " "x2, x3, [x11, 2 * 16]"), + Q!(" csel " "x0, x0, x2, eq"), + Q!(" csel " "x1, x1, x3, eq"), + Q!(" stp " "x0, x1, [" acc!() "+ 2 * 16]"), + + Q!(" ldp " "x0, x1, [x10, 3 * 16]"), + Q!(" ldp " "x2, x3, [x11, 3 * 16]"), + Q!(" csel " "x0, x0, x2, eq"), + Q!(" csel " "x1, x1, x3, eq"), + Q!(" stp " "x0, x1, [" acc!() "+ 3 * 16]"), + + Q!(" mov " "x0, #1"), + Q!(" stp " "x0, xzr, [" acc!() "+ 4 * 16]"), + Q!(" stp " "xzr, xzr, [" acc!() "+ 5 * 16]"), + + Q!(" ldp " "x0, x1, [x10, 4 * 16]"), + Q!(" ldp " "x2, x3, [x11, 4 * 16]"), + Q!(" csel " "x0, x0, x2, eq"), + Q!(" csel " "x1, x1, x3, eq"), + Q!(" stp " "x0, x1, [" acc!() "+ 6 * 16]"), + + Q!(" ldp " "x0, x1, [x10, 5 * 16]"), + Q!(" ldp " "x2, x3, [x11, 5 * 16]"), + Q!(" csel " "x0, x0, x2, eq"), + Q!(" csel " "x1, x1, x3, eq"), + Q!(" stp " "x0, x1, [" acc!() "+ 7 * 16]"), + + // The counter "i" tracks the bit position for which the scalar has + // already been absorbed, starting at 0 and going up in chunks of 4. + // + // The pointer "tab" points at the current block of the table for + // multiples (2^i * j) * B at the current bit position i; 1 <= j <= 8. + // + // The bias is always either 0 and 1 and needs to be added to the + // partially processed scalar implicitly. This is used to absorb 4 bits + // of scalar per iteration from 3-bit table indexing by exploiting + // negation: (16 * h + l) * B = (16 * (h + 1) - (16 - l)) * B is used + // when l >= 9. Note that we can't have any bias left over at the + // end because we made sure bit 251 is clear in the reduced scalar. + + Q!(" mov " i!() ", 0"), + Q!(" adrp " tab!() ", " PageRef!("edwards25519_scalarmulbase_alt_edwards25519_gtable")), + Q!(" mov " bias!() ", xzr"), + + // Start of the main loop, repeated 63 times for i = 0, 4, 8, ..., 248 + + Q!(Label!("edwards25519_scalarmulbase_alt_scalarloop", 2) ":"), + + // Look at the next 4-bit field "bf", adding the previous bias as well. + // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, + // setting the bias to 1 for the next iteration in the latter case. + + Q!(" lsr " "x0, " i!() ", #6"), + Q!(" ldr " "x2, [sp, x0, lsl #3]"), + Q!(" lsr " "x2, x2, " i!()), + Q!(" and " "x2, x2, #15"), + Q!(" add " bf!() ", x2, " bias!()), + + Q!(" cmp " bf!() ", 9"), + Q!(" cset " bias!() ", cs"), + + Q!(" mov " "x0, 16"), + Q!(" sub " "x0, x0, " bf!()), + Q!(" cmp " bias!() ", xzr"), + Q!(" csel " ix!() ", x0, " bf!() ", ne"), + + // Perform constant-time lookup in the table to get element number "ix". + // The table entry for the affine point (x,y) is actually a triple + // (y - x,x + y,2 * d * x * y) to precompute parts of the addition. + // Note that "ix" can be 0, so we set up the appropriate identity first. + + Q!(" mov " "x0, #1"), + Q!(" mov " "x1, xzr"), + Q!(" mov " "x2, xzr"), + Q!(" mov " "x3, xzr"), + Q!(" mov " "x4, #1"), + Q!(" mov " "x5, xzr"), + Q!(" mov " "x6, xzr"), + Q!(" mov " "x7, xzr"), + Q!(" mov " "x8, xzr"), + Q!(" mov " "x9, xzr"), + Q!(" mov " "x10, xzr"), + Q!(" mov " "x11, xzr"), + + Q!(" cmp " ix!() ", #1"), + Q!(" ldp " "x12, x13, [" tab!() "]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " tab!() ", " tab!() ", #96"), + + Q!(" cmp " ix!() ", #2"), + Q!(" ldp " "x12, x13, [" tab!() "]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " tab!() ", " tab!() ", #96"), + + Q!(" cmp " ix!() ", #3"), + Q!(" ldp " "x12, x13, [" tab!() "]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " tab!() ", " tab!() ", #96"), + + Q!(" cmp " ix!() ", #4"), + Q!(" ldp " "x12, x13, [" tab!() "]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " tab!() ", " tab!() ", #96"), + + Q!(" cmp " ix!() ", #5"), + Q!(" ldp " "x12, x13, [" tab!() "]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " tab!() ", " tab!() ", #96"), + + Q!(" cmp " ix!() ", #6"), + Q!(" ldp " "x12, x13, [" tab!() "]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " tab!() ", " tab!() ", #96"), + + Q!(" cmp " ix!() ", #7"), + Q!(" ldp " "x12, x13, [" tab!() "]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " tab!() ", " tab!() ", #96"), + + Q!(" cmp " ix!() ", #8"), + Q!(" ldp " "x12, x13, [" tab!() "]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [" tab!() ", #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " tab!() ", " tab!() ", #96"), + + // We now have the triple from the table in registers as follows + // + // [x3;x2;x1;x0] = y - x + // [x7;x6;x5;x4] = x + y + // [x11;x10;x9;x8] = 2 * d * x * y + // + // In case bias = 1 we need to negate this. For Edwards curves + // -(x,y) = (-x,y), i.e. we need to negate the x coordinate. + // In this processed encoding, that amounts to swapping the + // first two fields and negating the third. + // + // The optional negation here also pretends bias = 0 whenever + // ix = 0 so that it doesn't need to handle the case of zero + // inputs, since no non-trivial table entries are zero. Note + // that in the zero case the whole negation is trivial, and + // so indeed is the swapping. + + Q!(" cmp " bias!() ", #0"), + + Q!(" csel " "x12, x0, x4, eq"), + Q!(" csel " "x13, x1, x5, eq"), + Q!(" csel " "x14, x2, x6, eq"), + Q!(" csel " "x15, x3, x7, eq"), + Q!(" stp " "x12, x13, [" tabent!() "]"), + Q!(" stp " "x14, x15, [" tabent!() "+ 16]"), + + Q!(" csel " "x12, x0, x4, ne"), + Q!(" csel " "x13, x1, x5, ne"), + Q!(" csel " "x14, x2, x6, ne"), + Q!(" csel " "x15, x3, x7, ne"), + Q!(" stp " "x12, x13, [" tabent!() "+ 32]"), + Q!(" stp " "x14, x15, [" tabent!() "+ 48]"), + + Q!(" mov " "x0, #-19"), + Q!(" subs " "x0, x0, x8"), + Q!(" mov " "x2, #-1"), + Q!(" sbcs " "x1, x2, x9"), + Q!(" sbcs " "x2, x2, x10"), + Q!(" mov " "x3, #0x7FFFFFFFFFFFFFFF"), + Q!(" sbc " "x3, x3, x11"), + + Q!(" cmp " ix!() ", xzr"), + Q!(" ccmp " bias!() ", xzr, #4, ne"), + + Q!(" csel " "x0, x0, x8, ne"), + Q!(" csel " "x1, x1, x9, ne"), + Q!(" stp " "x0, x1, [" tabent!() "+ 64]"), + Q!(" csel " "x2, x2, x10, ne"), + Q!(" csel " "x3, x3, x11, ne"), + Q!(" stp " "x2, x3, [" tabent!() "+ 80]"), + + // Extended-projective and precomputed mixed addition. + // This is effectively the same as calling the standalone + // function edwards25519_pepadd_alt(acc,acc,tabent), but we + // only retain slightly weaker normalization < 2 * p_25519 + // throughout the inner loop, so the computation is + // slightly different, and faster overall. + + double_twice4!(t0!(), z_1!()), + sub_twice4!(t1!(), y_1!(), x_1!()), + add_twice4!(t2!(), y_1!(), x_1!()), + mul_4!(t3!(), w_1!(), kxy_2!()), + mul_4!(t1!(), t1!(), ymx_2!()), + mul_4!(t2!(), t2!(), xpy_2!()), + sub_twice4!(t4!(), t0!(), t3!()), + add_twice4!(t0!(), t0!(), t3!()), + sub_twice4!(t5!(), t2!(), t1!()), + add_twice4!(t1!(), t2!(), t1!()), + mul_4!(z_3!(), t4!(), t0!()), + mul_4!(x_3!(), t5!(), t4!()), + mul_4!(y_3!(), t0!(), t1!()), + mul_4!(w_3!(), t5!(), t1!()), + + // End of the main loop; move on by 4 bits. + + Q!(" add " i!() ", " i!() ", 4"), + Q!(" cmp " i!() ", 252"), + Q!(" bcc " Label!("edwards25519_scalarmulbase_alt_scalarloop", 2, Before)), + + // Insert the optional negation of the projective X coordinate, and + // so by extension the final affine x coordinate x = X/Z and thus + // the point P = (x,y). We only know X < 2 * p_25519, so we do the + // negation as 2 * p_25519 - X to keep it nonnegative. From this + // point on we don't need any normalization of the coordinates + // except for making sure that they fit in 4 digits. + + Q!(" ldp " "x0, x1, [" x_3!() "]"), + Q!(" ldp " "x2, x3, [" x_3!() "+ 16]"), + Q!(" mov " "x4, #0xffffffffffffffda"), + Q!(" subs " "x4, x4, x0"), + Q!(" mov " "x7, #0xffffffffffffffff"), + Q!(" sbcs " "x5, x7, x1"), + Q!(" sbcs " "x6, x7, x2"), + Q!(" sbc " "x7, x7, x3"), + Q!(" ldr " "x10, [" scalar!() "+ 24]"), + Q!(" tst " "x10, #0x8000000000000000"), + Q!(" csel " "x0, x4, x0, ne"), + Q!(" csel " "x1, x5, x1, ne"), + Q!(" csel " "x2, x6, x2, ne"), + Q!(" csel " "x3, x7, x3, ne"), + Q!(" stp " "x0, x1, [" x_3!() "]"), + Q!(" stp " "x2, x3, [" x_3!() "+ 16]"), + + // Now we need to map out of the extended-projective representation + // (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means + // first calling the modular inverse to get w_3 = 1/z_3. + + Q!(" add " "x0, " w_3!()), + Q!(" add " "x1, " z_3!()), + + // Inline copy of bignum_inv_p25519, identical except for stripping out + // the prologue and epilogue saving and restoring registers and making + // and reclaiming room on the stack. For more details and explanations see + // "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for + // its own temporaries is 128 bytes, so it has no effect on variables + // that are needed in the rest of our computation here: res, w_3, x_3 + // and y_3. + + Q!(" mov " "x20, x0"), + Q!(" mov " "x10, #0xffffffffffffffed"), + Q!(" mov " "x11, #0xffffffffffffffff"), + Q!(" stp " "x10, x11, [sp]"), + Q!(" mov " "x12, #0x7fffffffffffffff"), + Q!(" stp " "x11, x12, [sp, #16]"), + Q!(" ldp " "x2, x3, [x1]"), + Q!(" ldp " "x4, x5, [x1, #16]"), + Q!(" mov " "x7, #0x13"), + Q!(" lsr " "x6, x5, #63"), + Q!(" madd " "x6, x7, x6, x7"), + Q!(" adds " "x2, x2, x6"), + Q!(" adcs " "x3, x3, xzr"), + Q!(" adcs " "x4, x4, xzr"), + Q!(" orr " "x5, x5, #0x8000000000000000"), + Q!(" adcs " "x5, x5, xzr"), + Q!(" csel " "x6, x7, xzr, cc"), + Q!(" subs " "x2, x2, x6"), + Q!(" sbcs " "x3, x3, xzr"), + Q!(" sbcs " "x4, x4, xzr"), + Q!(" sbc " "x5, x5, xzr"), + Q!(" and " "x5, x5, #0x7fffffffffffffff"), + Q!(" stp " "x2, x3, [sp, #32]"), + Q!(" stp " "x4, x5, [sp, #48]"), + Q!(" stp " "xzr, xzr, [sp, #64]"), + Q!(" stp " "xzr, xzr, [sp, #80]"), + Q!(" mov " "x10, #0x2099"), + Q!(" movk " "x10, #0x7502, lsl #16"), + Q!(" movk " "x10, #0x9e23, lsl #32"), + Q!(" movk " "x10, #0xa0f9, lsl #48"), + Q!(" mov " "x11, #0x2595"), + Q!(" movk " "x11, #0x1d13, lsl #16"), + Q!(" movk " "x11, #0x8f3f, lsl #32"), + Q!(" movk " "x11, #0xa8c6, lsl #48"), + Q!(" mov " "x12, #0x5242"), + Q!(" movk " "x12, #0x5ac, lsl #16"), + Q!(" movk " "x12, #0x8938, lsl #32"), + Q!(" movk " "x12, #0x6c6c, lsl #48"), + Q!(" mov " "x13, #0x615"), + Q!(" movk " "x13, #0x4177, lsl #16"), + Q!(" movk " "x13, #0x8b2, lsl #32"), + Q!(" movk " "x13, #0x2765, lsl #48"), + Q!(" stp " "x10, x11, [sp, #96]"), + Q!(" stp " "x12, x13, [sp, #112]"), + Q!(" mov " "x21, #0xa"), + Q!(" mov " "x22, #0x1"), + Q!(" b " Label!("edwards25519_scalarmulbase_alt_invmidloop", 3, After)), + Q!(Label!("edwards25519_scalarmulbase_alt_invloop", 4) ":"), + Q!(" cmp " "x10, xzr"), + Q!(" csetm " "x14, mi"), + Q!(" cneg " "x10, x10, mi"), + Q!(" cmp " "x11, xzr"), + Q!(" csetm " "x15, mi"), + Q!(" cneg " "x11, x11, mi"), + Q!(" cmp " "x12, xzr"), + Q!(" csetm " "x16, mi"), + Q!(" cneg " "x12, x12, mi"), + Q!(" cmp " "x13, xzr"), + Q!(" csetm " "x17, mi"), + Q!(" cneg " "x13, x13, mi"), + Q!(" and " "x0, x10, x14"), + Q!(" and " "x1, x11, x15"), + Q!(" add " "x9, x0, x1"), + Q!(" and " "x0, x12, x16"), + Q!(" and " "x1, x13, x17"), + Q!(" add " "x19, x0, x1"), + Q!(" ldr " "x7, [sp]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x4, x9, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" ldr " "x8, [sp, #32]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x4, x4, x0"), + Q!(" adc " "x2, x2, x1"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x5, x19, x0"), + Q!(" adc " "x3, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x7, [sp, #8]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x6, xzr, x1"), + Q!(" ldr " "x8, [sp, #40]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x6, x6, x1"), + Q!(" extr " "x4, x2, x4, #59"), + Q!(" str " "x4, [sp]"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x3, x3, x0"), + Q!(" adc " "x4, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x3, x3, x0"), + Q!(" adc " "x4, x4, x1"), + Q!(" extr " "x5, x3, x5, #59"), + Q!(" str " "x5, [sp, #32]"), + Q!(" ldr " "x7, [sp, #16]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x6, x6, x0"), + Q!(" adc " "x5, xzr, x1"), + Q!(" ldr " "x8, [sp, #48]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x6, x6, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" extr " "x2, x6, x2, #59"), + Q!(" str " "x2, [sp, #8]"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x4, x4, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x4, x4, x0"), + Q!(" adc " "x2, x2, x1"), + Q!(" extr " "x3, x4, x3, #59"), + Q!(" str " "x3, [sp, #40]"), + Q!(" ldr " "x7, [sp, #24]"), + Q!(" eor " "x1, x7, x14"), + Q!(" asr " "x3, x1, #63"), + Q!(" and " "x3, x3, x10"), + Q!(" neg " "x3, x3"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x8, [sp, #56]"), + Q!(" eor " "x1, x8, x15"), + Q!(" asr " "x0, x1, #63"), + Q!(" and " "x0, x0, x11"), + Q!(" sub " "x3, x3, x0"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" extr " "x6, x5, x6, #59"), + Q!(" str " "x6, [sp, #16]"), + Q!(" extr " "x5, x3, x5, #59"), + Q!(" str " "x5, [sp, #24]"), + Q!(" eor " "x1, x7, x16"), + Q!(" asr " "x5, x1, #63"), + Q!(" and " "x5, x5, x12"), + Q!(" neg " "x5, x5"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" asr " "x0, x1, #63"), + Q!(" and " "x0, x0, x13"), + Q!(" sub " "x5, x5, x0"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" extr " "x4, x2, x4, #59"), + Q!(" str " "x4, [sp, #48]"), + Q!(" extr " "x2, x5, x2, #59"), + Q!(" str " "x2, [sp, #56]"), + Q!(" ldr " "x7, [sp, #64]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x4, x9, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" ldr " "x8, [sp, #96]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x4, x4, x0"), + Q!(" str " "x4, [sp, #64]"), + Q!(" adc " "x2, x2, x1"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x5, x19, x0"), + Q!(" adc " "x3, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x5, x5, x0"), + Q!(" str " "x5, [sp, #96]"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x7, [sp, #72]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x6, xzr, x1"), + Q!(" ldr " "x8, [sp, #104]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x2, x2, x0"), + Q!(" str " "x2, [sp, #72]"), + Q!(" adc " "x6, x6, x1"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x3, x3, x0"), + Q!(" adc " "x4, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x3, x3, x0"), + Q!(" str " "x3, [sp, #104]"), + Q!(" adc " "x4, x4, x1"), + Q!(" ldr " "x7, [sp, #80]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x6, x6, x0"), + Q!(" adc " "x5, xzr, x1"), + Q!(" ldr " "x8, [sp, #112]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x6, x6, x0"), + Q!(" str " "x6, [sp, #80]"), + Q!(" adc " "x5, x5, x1"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x4, x4, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x4, x4, x0"), + Q!(" str " "x4, [sp, #112]"), + Q!(" adc " "x2, x2, x1"), + Q!(" ldr " "x7, [sp, #88]"), + Q!(" eor " "x1, x7, x14"), + Q!(" and " "x3, x14, x10"), + Q!(" neg " "x3, x3"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x8, [sp, #120]"), + Q!(" eor " "x1, x8, x15"), + Q!(" and " "x0, x15, x11"), + Q!(" sub " "x3, x3, x0"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" extr " "x6, x3, x5, #63"), + Q!(" ldp " "x0, x1, [sp, #64]"), + Q!(" add " "x6, x6, x3, asr #63"), + Q!(" mov " "x3, #0x13"), + Q!(" mul " "x4, x6, x3"), + Q!(" add " "x5, x5, x6, lsl #63"), + Q!(" smulh " "x3, x6, x3"), + Q!(" ldr " "x6, [sp, #80]"), + Q!(" adds " "x0, x0, x4"), + Q!(" adcs " "x1, x1, x3"), + Q!(" asr " "x3, x3, #63"), + Q!(" adcs " "x6, x6, x3"), + Q!(" adc " "x5, x5, x3"), + Q!(" stp " "x0, x1, [sp, #64]"), + Q!(" stp " "x6, x5, [sp, #80]"), + Q!(" eor " "x1, x7, x16"), + Q!(" and " "x5, x16, x12"), + Q!(" neg " "x5, x5"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" and " "x0, x17, x13"), + Q!(" sub " "x5, x5, x0"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" extr " "x6, x5, x2, #63"), + Q!(" ldp " "x0, x1, [sp, #96]"), + Q!(" add " "x6, x6, x5, asr #63"), + Q!(" mov " "x5, #0x13"), + Q!(" mul " "x4, x6, x5"), + Q!(" add " "x2, x2, x6, lsl #63"), + Q!(" smulh " "x5, x6, x5"), + Q!(" ldr " "x3, [sp, #112]"), + Q!(" adds " "x0, x0, x4"), + Q!(" adcs " "x1, x1, x5"), + Q!(" asr " "x5, x5, #63"), + Q!(" adcs " "x3, x3, x5"), + Q!(" adc " "x2, x2, x5"), + Q!(" stp " "x0, x1, [sp, #96]"), + Q!(" stp " "x3, x2, [sp, #112]"), + Q!(Label!("edwards25519_scalarmulbase_alt_invmidloop", 3) ":"), + Q!(" mov " "x1, x22"), + Q!(" ldr " "x2, [sp]"), + Q!(" ldr " "x3, [sp, #32]"), + Q!(" and " "x4, x2, #0xfffff"), + Q!(" orr " "x4, x4, #0xfffffe0000000000"), + Q!(" and " "x5, x3, #0xfffff"), + Q!(" orr " "x5, x5, #0xc000000000000000"), + Q!(" tst " "x5, #0x1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" add " "x8, x4, #0x100, lsl #12"), + Q!(" sbfx " "x8, x8, #21, #21"), + Q!(" mov " "x11, #0x100000"), + Q!(" add " "x11, x11, x11, lsl #21"), + Q!(" add " "x9, x4, x11"), + Q!(" asr " "x9, x9, #42"), + Q!(" add " "x10, x5, #0x100, lsl #12"), + Q!(" sbfx " "x10, x10, #21, #21"), + Q!(" add " "x11, x5, x11"), + Q!(" asr " "x11, x11, #42"), + Q!(" mul " "x6, x8, x2"), + Q!(" mul " "x7, x9, x3"), + Q!(" mul " "x2, x10, x2"), + Q!(" mul " "x3, x11, x3"), + Q!(" add " "x4, x6, x7"), + Q!(" add " "x5, x2, x3"), + Q!(" asr " "x2, x4, #20"), + Q!(" asr " "x3, x5, #20"), + Q!(" and " "x4, x2, #0xfffff"), + Q!(" orr " "x4, x4, #0xfffffe0000000000"), + Q!(" and " "x5, x3, #0xfffff"), + Q!(" orr " "x5, x5, #0xc000000000000000"), + Q!(" tst " "x5, #0x1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" add " "x12, x4, #0x100, lsl #12"), + Q!(" sbfx " "x12, x12, #21, #21"), + Q!(" mov " "x15, #0x100000"), + Q!(" add " "x15, x15, x15, lsl #21"), + Q!(" add " "x13, x4, x15"), + Q!(" asr " "x13, x13, #42"), + Q!(" add " "x14, x5, #0x100, lsl #12"), + Q!(" sbfx " "x14, x14, #21, #21"), + Q!(" add " "x15, x5, x15"), + Q!(" asr " "x15, x15, #42"), + Q!(" mul " "x6, x12, x2"), + Q!(" mul " "x7, x13, x3"), + Q!(" mul " "x2, x14, x2"), + Q!(" mul " "x3, x15, x3"), + Q!(" add " "x4, x6, x7"), + Q!(" add " "x5, x2, x3"), + Q!(" asr " "x2, x4, #20"), + Q!(" asr " "x3, x5, #20"), + Q!(" and " "x4, x2, #0xfffff"), + Q!(" orr " "x4, x4, #0xfffffe0000000000"), + Q!(" and " "x5, x3, #0xfffff"), + Q!(" orr " "x5, x5, #0xc000000000000000"), + Q!(" tst " "x5, #0x1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" mul " "x2, x12, x8"), + Q!(" mul " "x3, x12, x9"), + Q!(" mul " "x6, x14, x8"), + Q!(" mul " "x7, x14, x9"), + Q!(" madd " "x8, x13, x10, x2"), + Q!(" madd " "x9, x13, x11, x3"), + Q!(" madd " "x16, x15, x10, x6"), + Q!(" madd " "x17, x15, x11, x7"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" add " "x12, x4, #0x100, lsl #12"), + Q!(" sbfx " "x12, x12, #22, #21"), + Q!(" mov " "x15, #0x100000"), + Q!(" add " "x15, x15, x15, lsl #21"), + Q!(" add " "x13, x4, x15"), + Q!(" asr " "x13, x13, #43"), + Q!(" add " "x14, x5, #0x100, lsl #12"), + Q!(" sbfx " "x14, x14, #22, #21"), + Q!(" add " "x15, x5, x15"), + Q!(" asr " "x15, x15, #43"), + Q!(" mneg " "x2, x12, x8"), + Q!(" mneg " "x3, x12, x9"), + Q!(" mneg " "x4, x14, x8"), + Q!(" mneg " "x5, x14, x9"), + Q!(" msub " "x10, x13, x16, x2"), + Q!(" msub " "x11, x13, x17, x3"), + Q!(" msub " "x12, x15, x16, x4"), + Q!(" msub " "x13, x15, x17, x5"), + Q!(" mov " "x22, x1"), + Q!(" subs " "x21, x21, #0x1"), + Q!(" b.ne " Label!("edwards25519_scalarmulbase_alt_invloop", 4, Before)), + Q!(" ldr " "x0, [sp]"), + Q!(" ldr " "x1, [sp, #32]"), + Q!(" mul " "x0, x0, x10"), + Q!(" madd " "x1, x1, x11, x0"), + Q!(" asr " "x0, x1, #63"), + Q!(" cmp " "x10, xzr"), + Q!(" csetm " "x14, mi"), + Q!(" cneg " "x10, x10, mi"), + Q!(" eor " "x14, x14, x0"), + Q!(" cmp " "x11, xzr"), + Q!(" csetm " "x15, mi"), + Q!(" cneg " "x11, x11, mi"), + Q!(" eor " "x15, x15, x0"), + Q!(" cmp " "x12, xzr"), + Q!(" csetm " "x16, mi"), + Q!(" cneg " "x12, x12, mi"), + Q!(" eor " "x16, x16, x0"), + Q!(" cmp " "x13, xzr"), + Q!(" csetm " "x17, mi"), + Q!(" cneg " "x13, x13, mi"), + Q!(" eor " "x17, x17, x0"), + Q!(" and " "x0, x10, x14"), + Q!(" and " "x1, x11, x15"), + Q!(" add " "x9, x0, x1"), + Q!(" ldr " "x7, [sp, #64]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x4, x9, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" ldr " "x8, [sp, #96]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x4, x4, x0"), + Q!(" str " "x4, [sp, #64]"), + Q!(" adc " "x2, x2, x1"), + Q!(" ldr " "x7, [sp, #72]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x6, xzr, x1"), + Q!(" ldr " "x8, [sp, #104]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x2, x2, x0"), + Q!(" str " "x2, [sp, #72]"), + Q!(" adc " "x6, x6, x1"), + Q!(" ldr " "x7, [sp, #80]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x6, x6, x0"), + Q!(" adc " "x5, xzr, x1"), + Q!(" ldr " "x8, [sp, #112]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x6, x6, x0"), + Q!(" str " "x6, [sp, #80]"), + Q!(" adc " "x5, x5, x1"), + Q!(" ldr " "x7, [sp, #88]"), + Q!(" eor " "x1, x7, x14"), + Q!(" and " "x3, x14, x10"), + Q!(" neg " "x3, x3"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x8, [sp, #120]"), + Q!(" eor " "x1, x8, x15"), + Q!(" and " "x0, x15, x11"), + Q!(" sub " "x3, x3, x0"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" extr " "x6, x3, x5, #63"), + Q!(" ldp " "x0, x1, [sp, #64]"), + Q!(" tst " "x3, x3"), + Q!(" cinc " "x6, x6, pl"), + Q!(" mov " "x3, #0x13"), + Q!(" mul " "x4, x6, x3"), + Q!(" add " "x5, x5, x6, lsl #63"), + Q!(" smulh " "x6, x6, x3"), + Q!(" ldr " "x2, [sp, #80]"), + Q!(" adds " "x0, x0, x4"), + Q!(" adcs " "x1, x1, x6"), + Q!(" asr " "x6, x6, #63"), + Q!(" adcs " "x2, x2, x6"), + Q!(" adcs " "x5, x5, x6"), + Q!(" csel " "x3, x3, xzr, mi"), + Q!(" subs " "x0, x0, x3"), + Q!(" sbcs " "x1, x1, xzr"), + Q!(" sbcs " "x2, x2, xzr"), + Q!(" sbc " "x5, x5, xzr"), + Q!(" and " "x5, x5, #0x7fffffffffffffff"), + Q!(" mov " "x4, x20"), + Q!(" stp " "x0, x1, [x4]"), + Q!(" stp " "x2, x5, [x4, #16]"), + + // The final result is x = X * inv(Z), y = Y * inv(Z). + // These are the only operations in the whole computation that + // fully reduce modulo p_25519 since now we want the canonical + // answer as output. + + mul_p25519!(resx!(), x_3!(), w_3!()), + mul_p25519!(resy!(), y_3!(), w_3!()), + + // Restore stack and registers + + Q!(" add " "sp, sp, # " NSPACE!()), + Q!(" ldp " "x23, x24, [sp], 16"), + Q!(" ldp " "x21, x22, [sp], 16"), + Q!(" ldp " "x19, x20, [sp], 16"), + + inout("x0") res.as_mut_ptr() => _, + inout("x1") scalar.as_ptr() => _, + edwards25519_scalarmulbase_alt_edwards25519_0g = sym edwards25519_scalarmulbase_alt_edwards25519_0g, + edwards25519_scalarmulbase_alt_edwards25519_251g = sym edwards25519_scalarmulbase_alt_edwards25519_251g, + edwards25519_scalarmulbase_alt_edwards25519_gtable = sym edwards25519_scalarmulbase_alt_edwards25519_gtable, + // clobbers + out("x10") _, + out("x11") _, + out("x12") _, + out("x13") _, + out("x14") _, + out("x15") _, + out("x16") _, + out("x17") _, + out("x2") _, + out("x20") _, + out("x21") _, + out("x22") _, + out("x23") _, + out("x24") _, + out("x3") _, + out("x4") _, + out("x5") _, + out("x6") _, + out("x7") _, + out("x8") _, + out("x9") _, + ) + }; +} + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// 0 * B = 0 and 2^251 * B in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +#[allow(dead_code)] +#[repr(align(4096))] +struct PageAlignedu64Array12([u64; 12]); + +static edwards25519_scalarmulbase_alt_edwards25519_0g: PageAlignedu64Array12 = + PageAlignedu64Array12([ + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000001, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + ]); + +static edwards25519_scalarmulbase_alt_edwards25519_251g: PageAlignedu64Array12 = + PageAlignedu64Array12([ + 0x525f946d7c7220e7, + 0x4636b0b2f1e35444, + 0x796e9d70e892ae0f, + 0x03dec05fa937adb1, + 0x6d1c271cc6375515, + 0x462588c4a4ca4f14, + 0x691129fee55afc39, + 0x15949f784d8472f5, + 0xbd89e510afad0049, + 0x4d1f08c073b9860e, + 0x07716e8b2d00af9d, + 0x70d685f68f859714, + // Precomputed table of multiples of generator for edwards25519 + // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + ]); + +#[allow(dead_code)] +#[repr(align(4096))] +struct PageAlignedu64Array6048([u64; 6048]); + +static edwards25519_scalarmulbase_alt_edwards25519_gtable: PageAlignedu64Array6048 = + PageAlignedu64Array6048([ + // 2^0 * 1 * G + 0x9d103905d740913e, + 0xfd399f05d140beb3, + 0xa5c18434688f8a09, + 0x44fd2f9298f81267, + 0x2fbc93c6f58c3b85, + 0xcf932dc6fb8c0e19, + 0x270b4898643d42c2, + 0x07cf9d3a33d4ba65, + 0xabc91205877aaa68, + 0x26d9e823ccaac49e, + 0x5a1b7dcbdd43598c, + 0x6f117b689f0c65a8, + // 2^0 * 2 * G + 0x8a99a56042b4d5a8, + 0x8f2b810c4e60acf6, + 0xe09e236bb16e37aa, + 0x6bb595a669c92555, + 0x9224e7fc933c71d7, + 0x9f469d967a0ff5b5, + 0x5aa69a65e1d60702, + 0x590c063fa87d2e2e, + 0x43faa8b3a59b7a5f, + 0x36c16bdd5d9acf78, + 0x500fa0840b3d6a31, + 0x701af5b13ea50b73, + // 2^0 * 3 * G + 0x56611fe8a4fcd265, + 0x3bd353fde5c1ba7d, + 0x8131f31a214bd6bd, + 0x2ab91587555bda62, + 0xaf25b0a84cee9730, + 0x025a8430e8864b8a, + 0xc11b50029f016732, + 0x7a164e1b9a80f8f4, + 0x14ae933f0dd0d889, + 0x589423221c35da62, + 0xd170e5458cf2db4c, + 0x5a2826af12b9b4c6, + // 2^0 * 4 * G + 0x95fe050a056818bf, + 0x327e89715660faa9, + 0xc3e8e3cd06a05073, + 0x27933f4c7445a49a, + 0x287351b98efc099f, + 0x6765c6f47dfd2538, + 0xca348d3dfb0a9265, + 0x680e910321e58727, + 0x5a13fbe9c476ff09, + 0x6e9e39457b5cc172, + 0x5ddbdcf9102b4494, + 0x7f9d0cbf63553e2b, + // 2^0 * 5 * G + 0x7f9182c3a447d6ba, + 0xd50014d14b2729b7, + 0xe33cf11cb864a087, + 0x154a7e73eb1b55f3, + 0xa212bc4408a5bb33, + 0x8d5048c3c75eed02, + 0xdd1beb0c5abfec44, + 0x2945ccf146e206eb, + 0xbcbbdbf1812a8285, + 0x270e0807d0bdd1fc, + 0xb41b670b1bbda72d, + 0x43aabe696b3bb69a, + // 2^0 * 6 * G + 0x499806b67b7d8ca4, + 0x575be28427d22739, + 0xbb085ce7204553b9, + 0x38b64c41ae417884, + 0x3a0ceeeb77157131, + 0x9b27158900c8af88, + 0x8065b668da59a736, + 0x51e57bb6a2cc38bd, + 0x85ac326702ea4b71, + 0xbe70e00341a1bb01, + 0x53e4a24b083bc144, + 0x10b8e91a9f0d61e3, + // 2^0 * 7 * G + 0xba6f2c9aaa3221b1, + 0x6ca021533bba23a7, + 0x9dea764f92192c3a, + 0x1d6edd5d2e5317e0, + 0x6b1a5cd0944ea3bf, + 0x7470353ab39dc0d2, + 0x71b2528228542e49, + 0x461bea69283c927e, + 0xf1836dc801b8b3a2, + 0xb3035f47053ea49a, + 0x529c41ba5877adf3, + 0x7a9fbb1c6a0f90a7, + // 2^0 * 8 * G + 0xe2a75dedf39234d9, + 0x963d7680e1b558f9, + 0x2c2741ac6e3c23fb, + 0x3a9024a1320e01c3, + 0x59b7596604dd3e8f, + 0x6cb30377e288702c, + 0xb1339c665ed9c323, + 0x0915e76061bce52f, + 0xe7c1f5d9c9a2911a, + 0xb8a371788bcca7d7, + 0x636412190eb62a32, + 0x26907c5c2ecc4e95, + // 2^4 * 1 * B + 0x7ec851ca553e2df3, + 0xa71284cba64878b3, + 0xe6b5e4193288d1e7, + 0x4cf210ec5a9a8883, + 0x322d04a52d9021f6, + 0xb9c19f3375c6bf9c, + 0x587a3a4342d20b09, + 0x143b1cf8aa64fe61, + 0x9f867c7d968acaab, + 0x5f54258e27092729, + 0xd0a7d34bea180975, + 0x21b546a3374126e1, + // 2^4 * 2 * B + 0xa94ff858a2888343, + 0xce0ed4565313ed3c, + 0xf55c3dcfb5bf34fa, + 0x0a653ca5c9eab371, + 0x490a7a45d185218f, + 0x9a15377846049335, + 0x0060ea09cc31e1f6, + 0x7e041577f86ee965, + 0x66b2a496ce5b67f3, + 0xff5492d8bd569796, + 0x503cec294a592cd0, + 0x566943650813acb2, + // 2^4 * 3 * B + 0xb818db0c26620798, + 0x5d5c31d9606e354a, + 0x0982fa4f00a8cdc7, + 0x17e12bcd4653e2d4, + 0x5672f9eb1dabb69d, + 0xba70b535afe853fc, + 0x47ac0f752796d66d, + 0x32a5351794117275, + 0xd3a644a6df648437, + 0x703b6559880fbfdd, + 0xcb852540ad3a1aa5, + 0x0900b3f78e4c6468, + // 2^4 * 4 * B + 0x0a851b9f679d651b, + 0xe108cb61033342f2, + 0xd601f57fe88b30a3, + 0x371f3acaed2dd714, + 0xed280fbec816ad31, + 0x52d9595bd8e6efe3, + 0x0fe71772f6c623f5, + 0x4314030b051e293c, + 0xd560005efbf0bcad, + 0x8eb70f2ed1870c5e, + 0x201f9033d084e6a0, + 0x4c3a5ae1ce7b6670, + // 2^4 * 5 * B + 0x4138a434dcb8fa95, + 0x870cf67d6c96840b, + 0xde388574297be82c, + 0x7c814db27262a55a, + 0xbaf875e4c93da0dd, + 0xb93282a771b9294d, + 0x80d63fb7f4c6c460, + 0x6de9c73dea66c181, + 0x478904d5a04df8f2, + 0xfafbae4ab10142d3, + 0xf6c8ac63555d0998, + 0x5aac4a412f90b104, + // 2^4 * 6 * B + 0xc64f326b3ac92908, + 0x5551b282e663e1e0, + 0x476b35f54a1a4b83, + 0x1b9da3fe189f68c2, + 0x603a0d0abd7f5134, + 0x8089c932e1d3ae46, + 0xdf2591398798bd63, + 0x1c145cd274ba0235, + 0x32e8386475f3d743, + 0x365b8baf6ae5d9ef, + 0x825238b6385b681e, + 0x234929c1167d65e1, + // 2^4 * 7 * B + 0x984decaba077ade8, + 0x383f77ad19eb389d, + 0xc7ec6b7e2954d794, + 0x59c77b3aeb7c3a7a, + 0x48145cc21d099fcf, + 0x4535c192cc28d7e5, + 0x80e7c1e548247e01, + 0x4a5f28743b2973ee, + 0xd3add725225ccf62, + 0x911a3381b2152c5d, + 0xd8b39fad5b08f87d, + 0x6f05606b4799fe3b, + // 2^4 * 8 * B + 0x9ffe9e92177ba962, + 0x98aee71d0de5cae1, + 0x3ff4ae942d831044, + 0x714de12e58533ac8, + 0x5b433149f91b6483, + 0xadb5dc655a2cbf62, + 0x87fa8412632827b3, + 0x60895e91ab49f8d8, + 0xe9ecf2ed0cf86c18, + 0xb46d06120735dfd4, + 0xbc9da09804b96be7, + 0x73e2e62fd96dc26b, + // 2^8 * 1 * B + 0xed5b635449aa515e, + 0xa865c49f0bc6823a, + 0x850c1fe95b42d1c4, + 0x30d76d6f03d315b9, + 0x2eccdd0e632f9c1d, + 0x51d0b69676893115, + 0x52dfb76ba8637a58, + 0x6dd37d49a00eef39, + 0x6c4444172106e4c7, + 0xfb53d680928d7f69, + 0xb4739ea4694d3f26, + 0x10c697112e864bb0, + // 2^8 * 2 * B + 0x6493c4277dbe5fde, + 0x265d4fad19ad7ea2, + 0x0e00dfc846304590, + 0x25e61cabed66fe09, + 0x0ca62aa08358c805, + 0x6a3d4ae37a204247, + 0x7464d3a63b11eddc, + 0x03bf9baf550806ef, + 0x3f13e128cc586604, + 0x6f5873ecb459747e, + 0xa0b63dedcc1268f5, + 0x566d78634586e22c, + // 2^8 * 3 * B + 0x1637a49f9cc10834, + 0xbc8e56d5a89bc451, + 0x1cb5ec0f7f7fd2db, + 0x33975bca5ecc35d9, + 0xa1054285c65a2fd0, + 0x6c64112af31667c3, + 0x680ae240731aee58, + 0x14fba5f34793b22a, + 0x3cd746166985f7d4, + 0x593e5e84c9c80057, + 0x2fc3f2b67b61131e, + 0x14829cea83fc526c, + // 2^8 * 4 * B + 0xff437b8497dd95c2, + 0x6c744e30aa4eb5a7, + 0x9e0c5d613c85e88b, + 0x2fd9c71e5f758173, + 0x21e70b2f4e71ecb8, + 0xe656ddb940a477e3, + 0xbf6556cece1d4f80, + 0x05fc3bc4535d7b7e, + 0x24b8b3ae52afdedd, + 0x3495638ced3b30cf, + 0x33a4bc83a9be8195, + 0x373767475c651f04, + // 2^8 * 5 * B + 0x2fba99fd40d1add9, + 0xb307166f96f4d027, + 0x4363f05215f03bae, + 0x1fbea56c3b18f999, + 0x634095cb14246590, + 0xef12144016c15535, + 0x9e38140c8910bc60, + 0x6bf5905730907c8c, + 0x0fa778f1e1415b8a, + 0x06409ff7bac3a77e, + 0x6f52d7b89aa29a50, + 0x02521cf67a635a56, + // 2^8 * 6 * B + 0x513fee0b0a9d5294, + 0x8f98e75c0fdf5a66, + 0xd4618688bfe107ce, + 0x3fa00a7e71382ced, + 0xb1146720772f5ee4, + 0xe8f894b196079ace, + 0x4af8224d00ac824a, + 0x001753d9f7cd6cc4, + 0x3c69232d963ddb34, + 0x1dde87dab4973858, + 0xaad7d1f9a091f285, + 0x12b5fe2fa048edb6, + // 2^8 * 7 * B + 0x71f0fbc496fce34d, + 0x73b9826badf35bed, + 0xd2047261ff28c561, + 0x749b76f96fb1206f, + 0xdf2b7c26ad6f1e92, + 0x4b66d323504b8913, + 0x8c409dc0751c8bc3, + 0x6f7e93c20796c7b8, + 0x1f5af604aea6ae05, + 0xc12351f1bee49c99, + 0x61a808b5eeff6b66, + 0x0fcec10f01e02151, + // 2^8 * 8 * B + 0x644d58a649fe1e44, + 0x21fcaea231ad777e, + 0x02441c5a887fd0d2, + 0x4901aa7183c511f3, + 0x3df2d29dc4244e45, + 0x2b020e7493d8de0a, + 0x6cc8067e820c214d, + 0x413779166feab90a, + 0x08b1b7548c1af8f0, + 0xce0f7a7c246299b4, + 0xf760b0f91e06d939, + 0x41bb887b726d1213, + // 2^12 * 1 * B + 0x9267806c567c49d8, + 0x066d04ccca791e6a, + 0xa69f5645e3cc394b, + 0x5c95b686a0788cd2, + 0x97d980e0aa39f7d2, + 0x35d0384252c6b51c, + 0x7d43f49307cd55aa, + 0x56bd36cfb78ac362, + 0x2ac519c10d14a954, + 0xeaf474b494b5fa90, + 0xe6af8382a9f87a5a, + 0x0dea6db1879be094, + // 2^12 * 2 * B + 0xaa66bf547344e5ab, + 0xda1258888f1b4309, + 0x5e87d2b3fd564b2f, + 0x5b2c78885483b1dd, + 0x15baeb74d6a8797a, + 0x7ef55cf1fac41732, + 0x29001f5a3c8b05c5, + 0x0ad7cc8752eaccfb, + 0x52151362793408cf, + 0xeb0f170319963d94, + 0xa833b2fa883d9466, + 0x093a7fa775003c78, + // 2^12 * 3 * B + 0xe5107de63a16d7be, + 0xa377ffdc9af332cf, + 0x70d5bf18440b677f, + 0x6a252b19a4a31403, + 0xb8e9604460a91286, + 0x7f3fd8047778d3de, + 0x67d01e31bf8a5e2d, + 0x7b038a06c27b653e, + 0x9ed919d5d36990f3, + 0x5213aebbdb4eb9f2, + 0xc708ea054cb99135, + 0x58ded57f72260e56, + // 2^12 * 4 * B + 0x78e79dade9413d77, + 0xf257f9d59729e67d, + 0x59db910ee37aa7e6, + 0x6aa11b5bbb9e039c, + 0xda6d53265b0fd48b, + 0x8960823193bfa988, + 0xd78ac93261d57e28, + 0x79f2942d3a5c8143, + 0x97da2f25b6c88de9, + 0x251ba7eaacf20169, + 0x09b44f87ef4eb4e4, + 0x7d90ab1bbc6a7da5, + // 2^12 * 5 * B + 0x9acca683a7016bfe, + 0x90505f4df2c50b6d, + 0x6b610d5fcce435aa, + 0x19a10d446198ff96, + 0x1a07a3f496b3c397, + 0x11ceaa188f4e2532, + 0x7d9498d5a7751bf0, + 0x19ed161f508dd8a0, + 0x560a2cd687dce6ca, + 0x7f3568c48664cf4d, + 0x8741e95222803a38, + 0x483bdab1595653fc, + // 2^12 * 6 * B + 0xfa780f148734fa49, + 0x106f0b70360534e0, + 0x2210776fe3e307bd, + 0x3286c109dde6a0fe, + 0xd6cf4d0ab4da80f6, + 0x82483e45f8307fe0, + 0x05005269ae6f9da4, + 0x1c7052909cf7877a, + 0x32ee7de2874e98d4, + 0x14c362e9b97e0c60, + 0x5781dcde6a60a38a, + 0x217dd5eaaa7aa840, + // 2^12 * 7 * B + 0x9db7c4d0248e1eb0, + 0xe07697e14d74bf52, + 0x1e6a9b173c562354, + 0x7fa7c21f795a4965, + 0x8bdf1fb9be8c0ec8, + 0x00bae7f8e30a0282, + 0x4963991dad6c4f6c, + 0x07058a6e5df6f60a, + 0xe9eb02c4db31f67f, + 0xed25fd8910bcfb2b, + 0x46c8131f5c5cddb4, + 0x33b21c13a0cb9bce, + // 2^12 * 8 * B + 0x360692f8087d8e31, + 0xf4dcc637d27163f7, + 0x25a4e62065ea5963, + 0x659bf72e5ac160d9, + 0x9aafb9b05ee38c5b, + 0xbf9d2d4e071a13c7, + 0x8eee6e6de933290a, + 0x1c3bab17ae109717, + 0x1c9ab216c7cab7b0, + 0x7d65d37407bbc3cc, + 0x52744750504a58d5, + 0x09f2606b131a2990, + // 2^16 * 1 * B + 0x40e87d44744346be, + 0x1d48dad415b52b25, + 0x7c3a8a18a13b603e, + 0x4eb728c12fcdbdf7, + 0x7e234c597c6691ae, + 0x64889d3d0a85b4c8, + 0xdae2c90c354afae7, + 0x0a871e070c6a9e1d, + 0x3301b5994bbc8989, + 0x736bae3a5bdd4260, + 0x0d61ade219d59e3c, + 0x3ee7300f2685d464, + // 2^16 * 2 * B + 0xf5d255e49e7dd6b7, + 0x8016115c610b1eac, + 0x3c99975d92e187ca, + 0x13815762979125c2, + 0x43fa7947841e7518, + 0xe5c6fa59639c46d7, + 0xa1065e1de3052b74, + 0x7d47c6a2cfb89030, + 0x3fdad0148ef0d6e0, + 0x9d3e749a91546f3c, + 0x71ec621026bb8157, + 0x148cf58d34c9ec80, + // 2^16 * 3 * B + 0x46a492f67934f027, + 0x469984bef6840aa9, + 0x5ca1bc2a89611854, + 0x3ff2fa1ebd5dbbd4, + 0xe2572f7d9ae4756d, + 0x56c345bb88f3487f, + 0x9fd10b6d6960a88d, + 0x278febad4eaea1b9, + 0xb1aa681f8c933966, + 0x8c21949c20290c98, + 0x39115291219d3c52, + 0x4104dd02fe9c677b, + // 2^16 * 4 * B + 0x72b2bf5e1124422a, + 0xa1fa0c3398a33ab5, + 0x94cb6101fa52b666, + 0x2c863b00afaf53d5, + 0x81214e06db096ab8, + 0x21a8b6c90ce44f35, + 0x6524c12a409e2af5, + 0x0165b5a48efca481, + 0xf190a474a0846a76, + 0x12eff984cd2f7cc0, + 0x695e290658aa2b8f, + 0x591b67d9bffec8b8, + // 2^16 * 5 * B + 0x312f0d1c80b49bfa, + 0x5979515eabf3ec8a, + 0x727033c09ef01c88, + 0x3de02ec7ca8f7bcb, + 0x99b9b3719f18b55d, + 0xe465e5faa18c641e, + 0x61081136c29f05ed, + 0x489b4f867030128b, + 0xd232102d3aeb92ef, + 0xe16253b46116a861, + 0x3d7eabe7190baa24, + 0x49f5fbba496cbebf, + // 2^16 * 6 * B + 0x30949a108a5bcfd4, + 0xdc40dd70bc6473eb, + 0x92c294c1307c0d1c, + 0x5604a86dcbfa6e74, + 0x155d628c1e9c572e, + 0x8a4d86acc5884741, + 0x91a352f6515763eb, + 0x06a1a6c28867515b, + 0x7288d1d47c1764b6, + 0x72541140e0418b51, + 0x9f031a6018acf6d1, + 0x20989e89fe2742c6, + // 2^16 * 7 * B + 0x499777fd3a2dcc7f, + 0x32857c2ca54fd892, + 0xa279d864d207e3a0, + 0x0403ed1d0ca67e29, + 0x1674278b85eaec2e, + 0x5621dc077acb2bdf, + 0x640a4c1661cbf45a, + 0x730b9950f70595d3, + 0xc94b2d35874ec552, + 0xc5e6c8cf98246f8d, + 0xf7cb46fa16c035ce, + 0x5bd7454308303dcc, + // 2^16 * 8 * B + 0x7f9ad19528b24cc2, + 0x7f6b54656335c181, + 0x66b8b66e4fc07236, + 0x133a78007380ad83, + 0x85c4932115e7792a, + 0xc64c89a2bdcdddc9, + 0x9d1e3da8ada3d762, + 0x5bb7db123067f82c, + 0x0961f467c6ca62be, + 0x04ec21d6211952ee, + 0x182360779bd54770, + 0x740dca6d58f0e0d2, + // 2^20 * 1 * B + 0x50b70bf5d3f0af0b, + 0x4feaf48ae32e71f7, + 0x60e84ed3a55bbd34, + 0x00ed489b3f50d1ed, + 0x3906c72aed261ae5, + 0x9ab68fd988e100f7, + 0xf5e9059af3360197, + 0x0e53dc78bf2b6d47, + 0xb90829bf7971877a, + 0x5e4444636d17e631, + 0x4d05c52e18276893, + 0x27632d9a5a4a4af5, + // 2^20 * 2 * B + 0xd11ff05154b260ce, + 0xd86dc38e72f95270, + 0x601fcd0d267cc138, + 0x2b67916429e90ccd, + 0xa98285d187eaffdb, + 0xa5b4fbbbd8d0a864, + 0xb658f27f022663f7, + 0x3bbc2b22d99ce282, + 0xb917c952583c0a58, + 0x653ff9b80fe4c6f3, + 0x9b0da7d7bcdf3c0c, + 0x43a0eeb6ab54d60e, + // 2^20 * 3 * B + 0x396966a46d4a5487, + 0xf811a18aac2bb3ba, + 0x66e4685b5628b26b, + 0x70a477029d929b92, + 0x3ac6322357875fe8, + 0xd9d4f4ecf5fbcb8f, + 0x8dee8493382bb620, + 0x50c5eaa14c799fdc, + 0xdd0edc8bd6f2fb3c, + 0x54c63aa79cc7b7a0, + 0xae0b032b2c8d9f1a, + 0x6f9ce107602967fb, + // 2^20 * 4 * B + 0xad1054b1cde1c22a, + 0xc4a8e90248eb32df, + 0x5f3e7b33accdc0ea, + 0x72364713fc79963e, + 0x139693063520e0b5, + 0x437fcf7c88ea03fe, + 0xf7d4c40bd3c959bc, + 0x699154d1f893ded9, + 0x315d5c75b4b27526, + 0xcccb842d0236daa5, + 0x22f0c8a3345fee8e, + 0x73975a617d39dbed, + // 2^20 * 5 * B + 0xe4024df96375da10, + 0x78d3251a1830c870, + 0x902b1948658cd91c, + 0x7e18b10b29b7438a, + 0x6f37f392f4433e46, + 0x0e19b9a11f566b18, + 0x220fb78a1fd1d662, + 0x362a4258a381c94d, + 0x9071d9132b6beb2f, + 0x0f26e9ad28418247, + 0xeab91ec9bdec925d, + 0x4be65bc8f48af2de, + // 2^20 * 6 * B + 0x78487feba36e7028, + 0x5f3f13001dd8ce34, + 0x934fb12d4b30c489, + 0x056c244d397f0a2b, + 0x1d50fba257c26234, + 0x7bd4823adeb0678b, + 0xc2b0dc6ea6538af5, + 0x5665eec6351da73e, + 0xdb3ee00943bfb210, + 0x4972018720800ac2, + 0x26ab5d6173bd8667, + 0x20b209c2ab204938, + // 2^20 * 7 * B + 0x549e342ac07fb34b, + 0x02d8220821373d93, + 0xbc262d70acd1f567, + 0x7a92c9fdfbcac784, + 0x1fcca94516bd3289, + 0x448d65aa41420428, + 0x59c3b7b216a55d62, + 0x49992cc64e612cd8, + 0x65bd1bea70f801de, + 0x1befb7c0fe49e28a, + 0xa86306cdb1b2ae4a, + 0x3b7ac0cd265c2a09, + // 2^20 * 8 * B + 0x822bee438c01bcec, + 0x530cb525c0fbc73b, + 0x48519034c1953fe9, + 0x265cc261e09a0f5b, + 0xf0d54e4f22ed39a7, + 0xa2aae91e5608150a, + 0xf421b2e9eddae875, + 0x31bc531d6b7de992, + 0xdf3d134da980f971, + 0x7a4fb8d1221a22a7, + 0x3df7d42035aad6d8, + 0x2a14edcc6a1a125e, + // 2^24 * 1 * B + 0xdf48ee0752cfce4e, + 0xc3fffaf306ec08b7, + 0x05710b2ab95459c4, + 0x161d25fa963ea38d, + 0x231a8c570478433c, + 0xb7b5270ec281439d, + 0xdbaa99eae3d9079f, + 0x2c03f5256c2b03d9, + 0x790f18757b53a47d, + 0x307b0130cf0c5879, + 0x31903d77257ef7f9, + 0x699468bdbd96bbaf, + // 2^24 * 2 * B + 0xbd1f2f46f4dafecf, + 0x7cef0114a47fd6f7, + 0xd31ffdda4a47b37f, + 0x525219a473905785, + 0xd8dd3de66aa91948, + 0x485064c22fc0d2cc, + 0x9b48246634fdea2f, + 0x293e1c4e6c4a2e3a, + 0x376e134b925112e1, + 0x703778b5dca15da0, + 0xb04589af461c3111, + 0x5b605c447f032823, + // 2^24 * 3 * B + 0xb965805920c47c89, + 0xe7f0100c923b8fcc, + 0x0001256502e2ef77, + 0x24a76dcea8aeb3ee, + 0x3be9fec6f0e7f04c, + 0x866a579e75e34962, + 0x5542ef161e1de61a, + 0x2f12fef4cc5abdd5, + 0x0a4522b2dfc0c740, + 0x10d06e7f40c9a407, + 0xc6cf144178cff668, + 0x5e607b2518a43790, + // 2^24 * 4 * B + 0x58b31d8f6cdf1818, + 0x35cfa74fc36258a2, + 0xe1b3ff4f66e61d6e, + 0x5067acab6ccdd5f7, + 0xa02c431ca596cf14, + 0xe3c42d40aed3e400, + 0xd24526802e0f26db, + 0x201f33139e457068, + 0xfd527f6b08039d51, + 0x18b14964017c0006, + 0xd5220eb02e25a4a8, + 0x397cba8862460375, + // 2^24 * 5 * B + 0x30c13093f05959b2, + 0xe23aa18de9a97976, + 0x222fd491721d5e26, + 0x2339d320766e6c3a, + 0x7815c3fbc81379e7, + 0xa6619420dde12af1, + 0xffa9c0f885a8fdd5, + 0x771b4022c1e1c252, + 0xd87dd986513a2fa7, + 0xf5ac9b71f9d4cf08, + 0xd06bc31b1ea283b3, + 0x331a189219971a76, + // 2^24 * 6 * B + 0xf5166f45fb4f80c6, + 0x9c36c7de61c775cf, + 0xe3d4e81b9041d91c, + 0x31167c6b83bdfe21, + 0x26512f3a9d7572af, + 0x5bcbe28868074a9e, + 0x84edc1c11180f7c4, + 0x1ac9619ff649a67b, + 0xf22b3842524b1068, + 0x5068343bee9ce987, + 0xfc9d71844a6250c8, + 0x612436341f08b111, + // 2^24 * 7 * B + 0xd99d41db874e898d, + 0x09fea5f16c07dc20, + 0x793d2c67d00f9bbc, + 0x46ebe2309e5eff40, + 0x8b6349e31a2d2638, + 0x9ddfb7009bd3fd35, + 0x7f8bf1b8a3a06ba4, + 0x1522aa3178d90445, + 0x2c382f5369614938, + 0xdafe409ab72d6d10, + 0xe8c83391b646f227, + 0x45fe70f50524306c, + // 2^24 * 8 * B + 0xda4875a6960c0b8c, + 0x5b68d076ef0e2f20, + 0x07fb51cf3d0b8fd4, + 0x428d1623a0e392d4, + 0x62f24920c8951491, + 0x05f007c83f630ca2, + 0x6fbb45d2f5c9d4b8, + 0x16619f6db57a2245, + 0x084f4a4401a308fd, + 0xa82219c376a5caac, + 0xdeb8de4643d1bc7d, + 0x1d81592d60bd38c6, + // 2^28 * 1 * B + 0xd833d7beec2a4c38, + 0x2c9162830acc20ed, + 0xe93a47aa92df7581, + 0x702d67a3333c4a81, + 0x3a4a369a2f89c8a1, + 0x63137a1d7c8de80d, + 0xbcac008a78eda015, + 0x2cb8b3a5b483b03f, + 0x36e417cbcb1b90a1, + 0x33b3ddaa7f11794e, + 0x3f510808885bc607, + 0x24141dc0e6a8020d, + // 2^28 * 2 * B + 0x59f73c773fefee9d, + 0xb3f1ef89c1cf989d, + 0xe35dfb42e02e545f, + 0x5766120b47a1b47c, + 0x91925dccbd83157d, + 0x3ca1205322cc8094, + 0x28e57f183f90d6e4, + 0x1a4714cede2e767b, + 0xdb20ba0fb8b6b7ff, + 0xb732c3b677511fa1, + 0xa92b51c099f02d89, + 0x4f3875ad489ca5f1, + // 2^28 * 3 * B + 0xc7fc762f4932ab22, + 0x7ac0edf72f4c3c1b, + 0x5f6b55aa9aa895e8, + 0x3680274dad0a0081, + 0x79ed13f6ee73eec0, + 0xa5c6526d69110bb1, + 0xe48928c38603860c, + 0x722a1446fd7059f5, + 0xd0959fe9a8cf8819, + 0xd0a995508475a99c, + 0x6eac173320b09cc5, + 0x628ecf04331b1095, + // 2^28 * 4 * B + 0x98bcb118a9d0ddbc, + 0xee449e3408b4802b, + 0x87089226b8a6b104, + 0x685f349a45c7915d, + 0x9b41acf85c74ccf1, + 0xb673318108265251, + 0x99c92aed11adb147, + 0x7a47d70d34ecb40f, + 0x60a0c4cbcc43a4f5, + 0x775c66ca3677bea9, + 0xa17aa1752ff8f5ed, + 0x11ded9020e01fdc0, + // 2^28 * 5 * B + 0x890e7809caefe704, + 0x8728296de30e8c6c, + 0x4c5cd2a392aeb1c9, + 0x194263d15771531f, + 0x471f95b03bea93b7, + 0x0552d7d43313abd3, + 0xbd9370e2e17e3f7b, + 0x7b120f1db20e5bec, + 0x17d2fb3d86502d7a, + 0xb564d84450a69352, + 0x7da962c8a60ed75d, + 0x00d0f85b318736aa, + // 2^28 * 6 * B + 0x978b142e777c84fd, + 0xf402644705a8c062, + 0xa67ad51be7e612c7, + 0x2f7b459698dd6a33, + 0xa6753c1efd7621c1, + 0x69c0b4a7445671f5, + 0x971f527405b23c11, + 0x387bc74851a8c7cd, + 0x81894b4d4a52a9a8, + 0xadd93e12f6b8832f, + 0x184d8548b61bd638, + 0x3f1c62dbd6c9f6cd, + // 2^28 * 7 * B + 0x2e8f1f0091910c1f, + 0xa4df4fe0bff2e12c, + 0x60c6560aee927438, + 0x6338283facefc8fa, + 0x3fad3e40148f693d, + 0x052656e194eb9a72, + 0x2f4dcbfd184f4e2f, + 0x406f8db1c482e18b, + 0x9e630d2c7f191ee4, + 0x4fbf8301bc3ff670, + 0x787d8e4e7afb73c4, + 0x50d83d5be8f58fa5, + // 2^28 * 8 * B + 0x85683916c11a1897, + 0x2d69a4efe506d008, + 0x39af1378f664bd01, + 0x65942131361517c6, + 0xc0accf90b4d3b66d, + 0xa7059de561732e60, + 0x033d1f7870c6b0ba, + 0x584161cd26d946e4, + 0xbbf2b1a072d27ca2, + 0xbf393c59fbdec704, + 0xe98dbbcee262b81e, + 0x02eebd0b3029b589, + // 2^32 * 1 * B + 0x61368756a60dac5f, + 0x17e02f6aebabdc57, + 0x7f193f2d4cce0f7d, + 0x20234a7789ecdcf0, + 0x8765b69f7b85c5e8, + 0x6ff0678bd168bab2, + 0x3a70e77c1d330f9b, + 0x3a5f6d51b0af8e7c, + 0x76d20db67178b252, + 0x071c34f9d51ed160, + 0xf62a4a20b3e41170, + 0x7cd682353cffe366, + // 2^32 * 2 * B + 0x0be1a45bd887fab6, + 0x2a846a32ba403b6e, + 0xd9921012e96e6000, + 0x2838c8863bdc0943, + 0xa665cd6068acf4f3, + 0x42d92d183cd7e3d3, + 0x5759389d336025d9, + 0x3ef0253b2b2cd8ff, + 0xd16bb0cf4a465030, + 0xfa496b4115c577ab, + 0x82cfae8af4ab419d, + 0x21dcb8a606a82812, + // 2^32 * 3 * B + 0x5c6004468c9d9fc8, + 0x2540096ed42aa3cb, + 0x125b4d4c12ee2f9c, + 0x0bc3d08194a31dab, + 0x9a8d00fabe7731ba, + 0x8203607e629e1889, + 0xb2cc023743f3d97f, + 0x5d840dbf6c6f678b, + 0x706e380d309fe18b, + 0x6eb02da6b9e165c7, + 0x57bbba997dae20ab, + 0x3a4276232ac196dd, + // 2^32 * 4 * B + 0x4b42432c8a7084fa, + 0x898a19e3dfb9e545, + 0xbe9f00219c58e45d, + 0x1ff177cea16debd1, + 0x3bf8c172db447ecb, + 0x5fcfc41fc6282dbd, + 0x80acffc075aa15fe, + 0x0770c9e824e1a9f9, + 0xcf61d99a45b5b5fd, + 0x860984e91b3a7924, + 0xe7300919303e3e89, + 0x39f264fd41500b1e, + // 2^32 * 5 * B + 0xa7ad3417dbe7e29c, + 0xbd94376a2b9c139c, + 0xa0e91b8e93597ba9, + 0x1712d73468889840, + 0xd19b4aabfe097be1, + 0xa46dfce1dfe01929, + 0xc3c908942ca6f1ff, + 0x65c621272c35f14e, + 0xe72b89f8ce3193dd, + 0x4d103356a125c0bb, + 0x0419a93d2e1cfe83, + 0x22f9800ab19ce272, + // 2^32 * 6 * B + 0x605a368a3e9ef8cb, + 0xe3e9c022a5504715, + 0x553d48b05f24248f, + 0x13f416cd647626e5, + 0x42029fdd9a6efdac, + 0xb912cebe34a54941, + 0x640f64b987bdf37b, + 0x4171a4d38598cab4, + 0xfa2758aa99c94c8c, + 0x23006f6fb000b807, + 0xfbd291ddadda5392, + 0x508214fa574bd1ab, + // 2^32 * 7 * B + 0xc20269153ed6fe4b, + 0xa65a6739511d77c4, + 0xcbde26462c14af94, + 0x22f960ec6faba74b, + 0x461a15bb53d003d6, + 0xb2102888bcf3c965, + 0x27c576756c683a5a, + 0x3a7758a4c86cb447, + 0x548111f693ae5076, + 0x1dae21df1dfd54a6, + 0x12248c90f3115e65, + 0x5d9fd15f8de7f494, + // 2^32 * 8 * B + 0x031408d36d63727f, + 0x6a379aefd7c7b533, + 0xa9e18fc5ccaee24b, + 0x332f35914f8fbed3, + 0x3f244d2aeed7521e, + 0x8e3a9028432e9615, + 0xe164ba772e9c16d4, + 0x3bc187fa47eb98d8, + 0x6d470115ea86c20c, + 0x998ab7cb6c46d125, + 0xd77832b53a660188, + 0x450d81ce906fba03, + // 2^36 * 1 * B + 0xf8ae4d2ad8453902, + 0x7018058ee8db2d1d, + 0xaab3995fc7d2c11e, + 0x53b16d2324ccca79, + 0x23264d66b2cae0b5, + 0x7dbaed33ebca6576, + 0x030ebed6f0d24ac8, + 0x2a887f78f7635510, + 0x2a23b9e75c012d4f, + 0x0c974651cae1f2ea, + 0x2fb63273675d70ca, + 0x0ba7250b864403f5, + // 2^36 * 2 * B + 0xbb0d18fd029c6421, + 0xbc2d142189298f02, + 0x8347f8e68b250e96, + 0x7b9f2fe8032d71c9, + 0xdd63589386f86d9c, + 0x61699176e13a85a4, + 0x2e5111954eaa7d57, + 0x32c21b57fb60bdfb, + 0xd87823cd319e0780, + 0xefc4cfc1897775c5, + 0x4854fb129a0ab3f7, + 0x12c49d417238c371, + // 2^36 * 3 * B + 0x0950b533ffe83769, + 0x21861c1d8e1d6bd1, + 0xf022d8381302e510, + 0x2509200c6391cab4, + 0x09b3a01783799542, + 0x626dd08faad5ee3f, + 0xba00bceeeb70149f, + 0x1421b246a0a444c9, + 0x4aa43a8e8c24a7c7, + 0x04c1f540d8f05ef5, + 0xadba5e0c0b3eb9dc, + 0x2ab5504448a49ce3, + // 2^36 * 4 * B + 0x2ed227266f0f5dec, + 0x9824ee415ed50824, + 0x807bec7c9468d415, + 0x7093bae1b521e23f, + 0xdc07ac631c5d3afa, + 0x58615171f9df8c6c, + 0x72a079d89d73e2b0, + 0x7301f4ceb4eae15d, + 0x6409e759d6722c41, + 0xa674e1cf72bf729b, + 0xbc0a24eb3c21e569, + 0x390167d24ebacb23, + // 2^36 * 5 * B + 0x27f58e3bba353f1c, + 0x4c47764dbf6a4361, + 0xafbbc4e56e562650, + 0x07db2ee6aae1a45d, + 0xd7bb054ba2f2120b, + 0xe2b9ceaeb10589b7, + 0x3fe8bac8f3c0edbe, + 0x4cbd40767112cb69, + 0x0b603cc029c58176, + 0x5988e3825cb15d61, + 0x2bb61413dcf0ad8d, + 0x7b8eec6c74183287, + // 2^36 * 6 * B + 0xe4ca40782cd27cb0, + 0xdaf9c323fbe967bd, + 0xb29bd34a8ad41e9e, + 0x72810497626ede4d, + 0x32fee570fc386b73, + 0xda8b0141da3a8cc7, + 0x975ffd0ac8968359, + 0x6ee809a1b132a855, + 0x9444bb31fcfd863a, + 0x2fe3690a3e4e48c5, + 0xdc29c867d088fa25, + 0x13bd1e38d173292e, + // 2^36 * 7 * B + 0xd32b4cd8696149b5, + 0xe55937d781d8aab7, + 0x0bcb2127ae122b94, + 0x41e86fcfb14099b0, + 0x223fb5cf1dfac521, + 0x325c25316f554450, + 0x030b98d7659177ac, + 0x1ed018b64f88a4bd, + 0x3630dfa1b802a6b0, + 0x880f874742ad3bd5, + 0x0af90d6ceec5a4d4, + 0x746a247a37cdc5d9, + // 2^36 * 8 * B + 0xd531b8bd2b7b9af6, + 0x5005093537fc5b51, + 0x232fcf25c593546d, + 0x20a365142bb40f49, + 0x6eccd85278d941ed, + 0x2254ae83d22f7843, + 0xc522d02e7bbfcdb7, + 0x681e3351bff0e4e2, + 0x8b64b59d83034f45, + 0x2f8b71f21fa20efb, + 0x69249495ba6550e4, + 0x539ef98e45d5472b, + // 2^40 * 1 * B + 0x6e7bb6a1a6205275, + 0xaa4f21d7413c8e83, + 0x6f56d155e88f5cb2, + 0x2de25d4ba6345be1, + 0xd074d8961cae743f, + 0xf86d18f5ee1c63ed, + 0x97bdc55be7f4ed29, + 0x4cbad279663ab108, + 0x80d19024a0d71fcd, + 0xc525c20afb288af8, + 0xb1a3974b5f3a6419, + 0x7d7fbcefe2007233, + // 2^40 * 2 * B + 0xfaef1e6a266b2801, + 0x866c68c4d5739f16, + 0xf68a2fbc1b03762c, + 0x5975435e87b75a8d, + 0xcd7c5dc5f3c29094, + 0xc781a29a2a9105ab, + 0x80c61d36421c3058, + 0x4f9cd196dcd8d4d7, + 0x199297d86a7b3768, + 0xd0d058241ad17a63, + 0xba029cad5c1c0c17, + 0x7ccdd084387a0307, + // 2^40 * 3 * B + 0xdca6422c6d260417, + 0xae153d50948240bd, + 0xa9c0c1b4fb68c677, + 0x428bd0ed61d0cf53, + 0x9b0c84186760cc93, + 0xcdae007a1ab32a99, + 0xa88dec86620bda18, + 0x3593ca848190ca44, + 0x9213189a5e849aa7, + 0xd4d8c33565d8facd, + 0x8c52545b53fdbbd1, + 0x27398308da2d63e6, + // 2^40 * 4 * B + 0x42c38d28435ed413, + 0xbd50f3603278ccc9, + 0xbb07ab1a79da03ef, + 0x269597aebe8c3355, + 0xb9a10e4c0a702453, + 0x0fa25866d57d1bde, + 0xffb9d9b5cd27daf7, + 0x572c2945492c33fd, + 0xc77fc745d6cd30be, + 0xe4dfe8d3e3baaefb, + 0xa22c8830aa5dda0c, + 0x7f985498c05bca80, + // 2^40 * 5 * B + 0x3849ce889f0be117, + 0x8005ad1b7b54a288, + 0x3da3c39f23fc921c, + 0x76c2ec470a31f304, + 0xd35615520fbf6363, + 0x08045a45cf4dfba6, + 0xeec24fbc873fa0c2, + 0x30f2653cd69b12e7, + 0x8a08c938aac10c85, + 0x46179b60db276bcb, + 0xa920c01e0e6fac70, + 0x2f1273f1596473da, + // 2^40 * 6 * B + 0x4739fc7c8ae01e11, + 0xfd5274904a6aab9f, + 0x41d98a8287728f2e, + 0x5d9e572ad85b69f2, + 0x30488bd755a70bc0, + 0x06d6b5a4f1d442e7, + 0xead1a69ebc596162, + 0x38ac1997edc5f784, + 0x0666b517a751b13b, + 0x747d06867e9b858c, + 0xacacc011454dde49, + 0x22dfcd9cbfe9e69c, + // 2^40 * 7 * B + 0x8ddbd2e0c30d0cd9, + 0xad8e665facbb4333, + 0x8f6b258c322a961f, + 0x6b2916c05448c1c7, + 0x56ec59b4103be0a1, + 0x2ee3baecd259f969, + 0x797cb29413f5cd32, + 0x0fe9877824cde472, + 0x7edb34d10aba913b, + 0x4ea3cd822e6dac0e, + 0x66083dff6578f815, + 0x4c303f307ff00a17, + // 2^40 * 8 * B + 0xd30a3bd617b28c85, + 0xc5d377b739773bea, + 0xc6c6e78c1e6a5cbf, + 0x0d61b8f78b2ab7c4, + 0x29fc03580dd94500, + 0xecd27aa46fbbec93, + 0x130a155fc2e2a7f8, + 0x416b151ab706a1d5, + 0x56a8d7efe9c136b0, + 0xbd07e5cd58e44b20, + 0xafe62fda1b57e0ab, + 0x191a2af74277e8d2, + // 2^44 * 1 * B + 0xd550095bab6f4985, + 0x04f4cd5b4fbfaf1a, + 0x9d8e2ed12a0c7540, + 0x2bc24e04b2212286, + 0x09d4b60b2fe09a14, + 0xc384f0afdbb1747e, + 0x58e2ea8978b5fd6e, + 0x519ef577b5e09b0a, + 0x1863d7d91124cca9, + 0x7ac08145b88a708e, + 0x2bcd7309857031f5, + 0x62337a6e8ab8fae5, + // 2^44 * 2 * B + 0x4bcef17f06ffca16, + 0xde06e1db692ae16a, + 0x0753702d614f42b0, + 0x5f6041b45b9212d0, + 0xd1ab324e1b3a1273, + 0x18947cf181055340, + 0x3b5d9567a98c196e, + 0x7fa00425802e1e68, + 0x7d531574028c2705, + 0x80317d69db0d75fe, + 0x30fface8ef8c8ddd, + 0x7e9de97bb6c3e998, + // 2^44 * 3 * B + 0x1558967b9e6585a3, + 0x97c99ce098e98b92, + 0x10af149b6eb3adad, + 0x42181fe8f4d38cfa, + 0xf004be62a24d40dd, + 0xba0659910452d41f, + 0x81c45ee162a44234, + 0x4cb829d8a22266ef, + 0x1dbcaa8407b86681, + 0x081f001e8b26753b, + 0x3cd7ce6a84048e81, + 0x78af11633f25f22c, + // 2^44 * 4 * B + 0x8416ebd40b50babc, + 0x1508722628208bee, + 0xa3148fafb9c1c36d, + 0x0d07daacd32d7d5d, + 0x3241c00e7d65318c, + 0xe6bee5dcd0e86de7, + 0x118b2dc2fbc08c26, + 0x680d04a7fc603dc3, + 0xf9c2414a695aa3eb, + 0xdaa42c4c05a68f21, + 0x7c6c23987f93963e, + 0x210e8cd30c3954e3, + // 2^44 * 5 * B + 0xac4201f210a71c06, + 0x6a65e0aef3bfb021, + 0xbc42c35c393632f7, + 0x56ea8db1865f0742, + 0x2b50f16137fe6c26, + 0xe102bcd856e404d8, + 0x12b0f1414c561f6b, + 0x51b17bc8d028ec91, + 0xfff5fb4bcf535119, + 0xf4989d79df1108a0, + 0xbdfcea659a3ba325, + 0x18a11f1174d1a6f2, + // 2^44 * 6 * B + 0x407375ab3f6bba29, + 0x9ec3b6d8991e482e, + 0x99c80e82e55f92e9, + 0x307c13b6fb0c0ae1, + 0xfbd63cdad27a5f2c, + 0xf00fc4bc8aa106d7, + 0x53fb5c1a8e64a430, + 0x04eaabe50c1a2e85, + 0x24751021cb8ab5e7, + 0xfc2344495c5010eb, + 0x5f1e717b4e5610a1, + 0x44da5f18c2710cd5, + // 2^44 * 7 * B + 0x033cc55ff1b82eb5, + 0xb15ae36d411cae52, + 0xba40b6198ffbacd3, + 0x768edce1532e861f, + 0x9156fe6b89d8eacc, + 0xe6b79451e23126a1, + 0xbd7463d93944eb4e, + 0x726373f6767203ae, + 0xe305ca72eb7ef68a, + 0x662cf31f70eadb23, + 0x18f026fdb4c45b68, + 0x513b5384b5d2ecbd, + // 2^44 * 8 * B + 0x46d46280c729989e, + 0x4b93fbd05368a5dd, + 0x63df3f81d1765a89, + 0x34cebd64b9a0a223, + 0x5e2702878af34ceb, + 0x900b0409b946d6ae, + 0x6512ebf7dabd8512, + 0x61d9b76988258f81, + 0xa6c5a71349b7d94b, + 0xa3f3d15823eb9446, + 0x0416fbd277484834, + 0x69d45e6f2c70812f, + // 2^48 * 1 * B + 0xce16f74bc53c1431, + 0x2b9725ce2072edde, + 0xb8b9c36fb5b23ee7, + 0x7e2e0e450b5cc908, + 0x9fe62b434f460efb, + 0xded303d4a63607d6, + 0xf052210eb7a0da24, + 0x237e7dbe00545b93, + 0x013575ed6701b430, + 0x231094e69f0bfd10, + 0x75320f1583e47f22, + 0x71afa699b11155e3, + // 2^48 * 2 * B + 0x65ce6f9b3953b61d, + 0xc65839eaafa141e6, + 0x0f435ffda9f759fe, + 0x021142e9c2b1c28e, + 0xea423c1c473b50d6, + 0x51e87a1f3b38ef10, + 0x9b84bf5fb2c9be95, + 0x00731fbc78f89a1c, + 0xe430c71848f81880, + 0xbf960c225ecec119, + 0xb6dae0836bba15e3, + 0x4c4d6f3347e15808, + // 2^48 * 3 * B + 0x18f7eccfc17d1fc9, + 0x6c75f5a651403c14, + 0xdbde712bf7ee0cdf, + 0x193fddaaa7e47a22, + 0x2f0cddfc988f1970, + 0x6b916227b0b9f51b, + 0x6ec7b6c4779176be, + 0x38bf9500a88f9fa8, + 0x1fd2c93c37e8876f, + 0xa2f61e5a18d1462c, + 0x5080f58239241276, + 0x6a6fb99ebf0d4969, + // 2^48 * 4 * B + 0x6a46c1bb560855eb, + 0x2416bb38f893f09d, + 0xd71d11378f71acc1, + 0x75f76914a31896ea, + 0xeeb122b5b6e423c6, + 0x939d7010f286ff8e, + 0x90a92a831dcf5d8c, + 0x136fda9f42c5eb10, + 0xf94cdfb1a305bdd1, + 0x0f364b9d9ff82c08, + 0x2a87d8a5c3bb588a, + 0x022183510be8dcba, + // 2^48 * 5 * B + 0x4af766385ead2d14, + 0xa08ed880ca7c5830, + 0x0d13a6e610211e3d, + 0x6a071ce17b806c03, + 0x9d5a710143307a7f, + 0xb063de9ec47da45f, + 0x22bbfe52be927ad3, + 0x1387c441fd40426c, + 0xb5d3c3d187978af8, + 0x722b5a3d7f0e4413, + 0x0d7b4848bb477ca0, + 0x3171b26aaf1edc92, + // 2^48 * 6 * B + 0xa92f319097564ca8, + 0xff7bb84c2275e119, + 0x4f55fe37a4875150, + 0x221fd4873cf0835a, + 0xa60db7d8b28a47d1, + 0xa6bf14d61770a4f1, + 0xd4a1f89353ddbd58, + 0x6c514a63344243e9, + 0x2322204f3a156341, + 0xfb73e0e9ba0a032d, + 0xfce0dd4c410f030e, + 0x48daa596fb924aaa, + // 2^48 * 7 * B + 0x6eca8e665ca59cc7, + 0xa847254b2e38aca0, + 0x31afc708d21e17ce, + 0x676dd6fccad84af7, + 0x14f61d5dc84c9793, + 0x9941f9e3ef418206, + 0xcdf5b88f346277ac, + 0x58c837fa0e8a79a9, + 0x0cf9688596fc9058, + 0x1ddcbbf37b56a01b, + 0xdcc2e77d4935d66a, + 0x1c4f73f2c6a57f0a, + // 2^48 * 8 * B + 0x0e7a4fbd305fa0bb, + 0x829d4ce054c663ad, + 0xf421c3832fe33848, + 0x795ac80d1bf64c42, + 0xb36e706efc7c3484, + 0x73dfc9b4c3c1cf61, + 0xeb1d79c9781cc7e5, + 0x70459adb7daf675c, + 0x1b91db4991b42bb3, + 0x572696234b02dcca, + 0x9fdf9ee51f8c78dc, + 0x5fe162848ce21fd3, + // 2^52 * 1 * B + 0xe2790aae4d077c41, + 0x8b938270db7469a3, + 0x6eb632dc8abd16a2, + 0x720814ecaa064b72, + 0x315c29c795115389, + 0xd7e0e507862f74ce, + 0x0c4a762185927432, + 0x72de6c984a25a1e4, + 0xae9ab553bf6aa310, + 0x050a50a9806d6e1b, + 0x92bb7403adff5139, + 0x0394d27645be618b, + // 2^52 * 2 * B + 0x4d572251857eedf4, + 0xe3724edde19e93c5, + 0x8a71420e0b797035, + 0x3b3c833687abe743, + 0xf5396425b23545a4, + 0x15a7a27e98fbb296, + 0xab6c52bc636fdd86, + 0x79d995a8419334ee, + 0xcd8a8ea61195dd75, + 0xa504d8a81dd9a82f, + 0x540dca81a35879b6, + 0x60dd16a379c86a8a, + // 2^52 * 3 * B + 0x35a2c8487381e559, + 0x596ffea6d78082cb, + 0xcb9771ebdba7b653, + 0x5a08b5019b4da685, + 0x3501d6f8153e47b8, + 0xb7a9675414a2f60c, + 0x112ee8b6455d9523, + 0x4e62a3c18112ea8a, + 0xc8d4ac04516ab786, + 0x595af3215295b23d, + 0xd6edd234db0230c1, + 0x0929efe8825b41cc, + // 2^52 * 4 * B + 0x5f0601d1cbd0f2d3, + 0x736e412f6132bb7f, + 0x83604432238dde87, + 0x1e3a5272f5c0753c, + 0x8b3172b7ad56651d, + 0x01581b7a3fabd717, + 0x2dc94df6424df6e4, + 0x30376e5d2c29284f, + 0xd2918da78159a59c, + 0x6bdc1cd93f0713f3, + 0x565f7a934acd6590, + 0x53daacec4cb4c128, + // 2^52 * 5 * B + 0x4ca73bd79cc8a7d6, + 0x4d4a738f47e9a9b2, + 0xf4cbf12942f5fe00, + 0x01a13ff9bdbf0752, + 0x99852bc3852cfdb0, + 0x2cc12e9559d6ed0b, + 0x70f9e2bf9b5ac27b, + 0x4f3b8c117959ae99, + 0x55b6c9c82ff26412, + 0x1ac4a8c91fb667a8, + 0xd527bfcfeb778bf2, + 0x303337da7012a3be, + // 2^52 * 6 * B + 0x955422228c1c9d7c, + 0x01fac1371a9b340f, + 0x7e8d9177925b48d7, + 0x53f8ad5661b3e31b, + 0x976d3ccbfad2fdd1, + 0xcb88839737a640a8, + 0x2ff00c1d6734cb25, + 0x269ff4dc789c2d2b, + 0x0c003fbdc08d678d, + 0x4d982fa37ead2b17, + 0xc07e6bcdb2e582f1, + 0x296c7291df412a44, + // 2^52 * 7 * B + 0x7903de2b33daf397, + 0xd0ff0619c9a624b3, + 0x8a1d252b555b3e18, + 0x2b6d581c52e0b7c0, + 0xdfb23205dab8b59e, + 0x465aeaa0c8092250, + 0xd133c1189a725d18, + 0x2327370261f117d1, + 0x3d0543d3623e7986, + 0x679414c2c278a354, + 0xae43f0cc726196f6, + 0x7836c41f8245eaba, + // 2^52 * 8 * B + 0xe7a254db49e95a81, + 0x5192d5d008b0ad73, + 0x4d20e5b1d00afc07, + 0x5d55f8012cf25f38, + 0xca651e848011937c, + 0xc6b0c46e6ef41a28, + 0xb7021ba75f3f8d52, + 0x119dff99ead7b9fd, + 0x43eadfcbf4b31d4d, + 0xc6503f7411148892, + 0xfeee68c5060d3b17, + 0x329293b3dd4a0ac8, + // 2^56 * 1 * B + 0x4e59214fe194961a, + 0x49be7dc70d71cd4f, + 0x9300cfd23b50f22d, + 0x4789d446fc917232, + 0x2879852d5d7cb208, + 0xb8dedd70687df2e7, + 0xdc0bffab21687891, + 0x2b44c043677daa35, + 0x1a1c87ab074eb78e, + 0xfac6d18e99daf467, + 0x3eacbbcd484f9067, + 0x60c52eef2bb9a4e4, + // 2^56 * 2 * B + 0x0b5d89bc3bfd8bf1, + 0xb06b9237c9f3551a, + 0x0e4c16b0d53028f5, + 0x10bc9c312ccfcaab, + 0x702bc5c27cae6d11, + 0x44c7699b54a48cab, + 0xefbc4056ba492eb2, + 0x70d77248d9b6676d, + 0xaa8ae84b3ec2a05b, + 0x98699ef4ed1781e0, + 0x794513e4708e85d1, + 0x63755bd3a976f413, + // 2^56 * 3 * B + 0xb55fa03e2ad10853, + 0x356f75909ee63569, + 0x9ff9f1fdbe69b890, + 0x0d8cc1c48bc16f84, + 0x3dc7101897f1acb7, + 0x5dda7d5ec165bbd8, + 0x508e5b9c0fa1020f, + 0x2763751737c52a56, + 0x029402d36eb419a9, + 0xf0b44e7e77b460a5, + 0xcfa86230d43c4956, + 0x70c2dd8a7ad166e7, + // 2^56 * 4 * B + 0x656194509f6fec0e, + 0xee2e7ea946c6518d, + 0x9733c1f367e09b5c, + 0x2e0fac6363948495, + 0x91d4967db8ed7e13, + 0x74252f0ad776817a, + 0xe40982e00d852564, + 0x32b8613816a53ce5, + 0x79e7f7bee448cd64, + 0x6ac83a67087886d0, + 0xf89fd4d9a0e4db2e, + 0x4179215c735a4f41, + // 2^56 * 5 * B + 0x8c7094e7d7dced2a, + 0x97fb8ac347d39c70, + 0xe13be033a906d902, + 0x700344a30cd99d76, + 0xe4ae33b9286bcd34, + 0xb7ef7eb6559dd6dc, + 0x278b141fb3d38e1f, + 0x31fa85662241c286, + 0xaf826c422e3622f4, + 0xc12029879833502d, + 0x9bc1b7e12b389123, + 0x24bb2312a9952489, + // 2^56 * 6 * B + 0xb1a8ed1732de67c3, + 0x3cb49418461b4948, + 0x8ebd434376cfbcd2, + 0x0fee3e871e188008, + 0x41f80c2af5f85c6b, + 0x687284c304fa6794, + 0x8945df99a3ba1bad, + 0x0d1d2af9ffeb5d16, + 0xa9da8aa132621edf, + 0x30b822a159226579, + 0x4004197ba79ac193, + 0x16acd79718531d76, + // 2^56 * 7 * B + 0x72df72af2d9b1d3d, + 0x63462a36a432245a, + 0x3ecea07916b39637, + 0x123e0ef6b9302309, + 0xc959c6c57887b6ad, + 0x94e19ead5f90feba, + 0x16e24e62a342f504, + 0x164ed34b18161700, + 0x487ed94c192fe69a, + 0x61ae2cea3a911513, + 0x877bf6d3b9a4de27, + 0x78da0fc61073f3eb, + // 2^56 * 8 * B + 0x5bf15d28e52bc66a, + 0x2c47e31870f01a8e, + 0x2419afbc06c28bdd, + 0x2d25deeb256b173a, + 0xa29f80f1680c3a94, + 0x71f77e151ae9e7e6, + 0x1100f15848017973, + 0x054aa4b316b38ddd, + 0xdfc8468d19267cb8, + 0x0b28789c66e54daf, + 0x2aeb1d2a666eec17, + 0x134610a6ab7da760, + // 2^60 * 1 * B + 0xcaf55ec27c59b23f, + 0x99aeed3e154d04f2, + 0x68441d72e14141f4, + 0x140345133932a0a2, + 0xd91430e0dc028c3c, + 0x0eb955a85217c771, + 0x4b09e1ed2c99a1fa, + 0x42881af2bd6a743c, + 0x7bfec69aab5cad3d, + 0xc23e8cd34cb2cfad, + 0x685dd14bfb37d6a2, + 0x0ad6d64415677a18, + // 2^60 * 2 * B + 0x781a439e417becb5, + 0x4ac5938cd10e0266, + 0x5da385110692ac24, + 0x11b065a2ade31233, + 0x7914892847927e9f, + 0x33dad6ef370aa877, + 0x1f8f24fa11122703, + 0x5265ac2f2adf9592, + 0x405fdd309afcb346, + 0xd9723d4428e63f54, + 0x94c01df05f65aaae, + 0x43e4dc3ae14c0809, + // 2^60 * 3 * B + 0xbc12c7f1a938a517, + 0x473028ab3180b2e1, + 0x3f78571efbcd254a, + 0x74e534426ff6f90f, + 0xea6f7ac3adc2c6a3, + 0xd0e928f6e9717c94, + 0xe2d379ead645eaf5, + 0x46dd8785c51ffbbe, + 0x709801be375c8898, + 0x4b06dab5e3fd8348, + 0x75880ced27230714, + 0x2b09468fdd2f4c42, + // 2^60 * 4 * B + 0x97c749eeb701cb96, + 0x83f438d4b6a369c3, + 0x62962b8b9a402cd9, + 0x6976c7509888df7b, + 0x5b97946582ffa02a, + 0xda096a51fea8f549, + 0xa06351375f77af9b, + 0x1bcfde61201d1e76, + 0x4a4a5490246a59a2, + 0xd63ebddee87fdd90, + 0xd9437c670d2371fa, + 0x69e87308d30f8ed6, + // 2^60 * 5 * B + 0x435a8bb15656beb0, + 0xf8fac9ba4f4d5bca, + 0xb9b278c41548c075, + 0x3eb0ef76e892b622, + 0x0f80bf028bc80303, + 0x6aae16b37a18cefb, + 0xdd47ea47d72cd6a3, + 0x61943588f4ed39aa, + 0xd26e5c3e91039f85, + 0xc0e9e77df6f33aa9, + 0xe8968c5570066a93, + 0x3c34d1881faaaddd, + // 2^60 * 6 * B + 0x3f9d2b5ea09f9ec0, + 0x1dab3b6fb623a890, + 0xa09ba3ea72d926c4, + 0x374193513fd8b36d, + 0xbd5b0b8f2fffe0d9, + 0x6aa254103ed24fb9, + 0x2ac7d7bcb26821c4, + 0x605b394b60dca36a, + 0xb4e856e45a9d1ed2, + 0xefe848766c97a9a2, + 0xb104cf641e5eee7d, + 0x2f50b81c88a71c8f, + // 2^60 * 7 * B + 0x31723c61fc6811bb, + 0x9cb450486211800f, + 0x768933d347995753, + 0x3491a53502752fcd, + 0x2b552ca0a7da522a, + 0x3230b336449b0250, + 0xf2c4c5bca4b99fb9, + 0x7b2c674958074a22, + 0xd55165883ed28cdf, + 0x12d84fd2d362de39, + 0x0a874ad3e3378e4f, + 0x000d2b1f7c763e74, + // 2^60 * 8 * B + 0x3d420811d06d4a67, + 0xbefc048590e0ffe3, + 0xf870c6b7bd487bde, + 0x6e2a7316319afa28, + 0x9624778c3e94a8ab, + 0x0ad6f3cee9a78bec, + 0x948ac7810d743c4f, + 0x76627935aaecfccc, + 0x56a8ac24d6d59a9f, + 0xc8db753e3096f006, + 0x477f41e68f4c5299, + 0x588d851cf6c86114, + // 2^64 * 1 * B + 0x51138ec78df6b0fe, + 0x5397da89e575f51b, + 0x09207a1d717af1b9, + 0x2102fdba2b20d650, + 0xcd2a65e777d1f515, + 0x548991878faa60f1, + 0xb1b73bbcdabc06e5, + 0x654878cba97cc9fb, + 0x969ee405055ce6a1, + 0x36bca7681251ad29, + 0x3a1af517aa7da415, + 0x0ad725db29ecb2ba, + // 2^64 * 2 * B + 0xdc4267b1834e2457, + 0xb67544b570ce1bc5, + 0x1af07a0bf7d15ed7, + 0x4aefcffb71a03650, + 0xfec7bc0c9b056f85, + 0x537d5268e7f5ffd7, + 0x77afc6624312aefa, + 0x4f675f5302399fd9, + 0xc32d36360415171e, + 0xcd2bef118998483b, + 0x870a6eadd0945110, + 0x0bccbb72a2a86561, + // 2^64 * 3 * B + 0x185e962feab1a9c8, + 0x86e7e63565147dcd, + 0xb092e031bb5b6df2, + 0x4024f0ab59d6b73e, + 0x186d5e4c50fe1296, + 0xe0397b82fee89f7e, + 0x3bc7f6c5507031b0, + 0x6678fd69108f37c2, + 0x1586fa31636863c2, + 0x07f68c48572d33f2, + 0x4f73cc9f789eaefc, + 0x2d42e2108ead4701, + // 2^64 * 4 * B + 0x97f5131594dfd29b, + 0x6155985d313f4c6a, + 0xeba13f0708455010, + 0x676b2608b8d2d322, + 0x21717b0d0f537593, + 0x914e690b131e064c, + 0x1bb687ae752ae09f, + 0x420bf3a79b423c6e, + 0x8138ba651c5b2b47, + 0x8671b6ec311b1b80, + 0x7bff0cb1bc3135b0, + 0x745d2ffa9c0cf1e0, + // 2^64 * 5 * B + 0xbf525a1e2bc9c8bd, + 0xea5b260826479d81, + 0xd511c70edf0155db, + 0x1ae23ceb960cf5d0, + 0x6036df5721d34e6a, + 0xb1db8827997bb3d0, + 0xd3c209c3c8756afa, + 0x06e15be54c1dc839, + 0x5b725d871932994a, + 0x32351cb5ceb1dab0, + 0x7dc41549dab7ca05, + 0x58ded861278ec1f7, + // 2^64 * 6 * B + 0xd8173793f266c55c, + 0xc8c976c5cc454e49, + 0x5ce382f8bc26c3a8, + 0x2ff39de85485f6f9, + 0x2dfb5ba8b6c2c9a8, + 0x48eeef8ef52c598c, + 0x33809107f12d1573, + 0x08ba696b531d5bd8, + 0x77ed3eeec3efc57a, + 0x04e05517d4ff4811, + 0xea3d7a3ff1a671cb, + 0x120633b4947cfe54, + // 2^64 * 7 * B + 0x0b94987891610042, + 0x4ee7b13cecebfae8, + 0x70be739594f0a4c0, + 0x35d30a99b4d59185, + 0x82bd31474912100a, + 0xde237b6d7e6fbe06, + 0xe11e761911ea79c6, + 0x07433be3cb393bde, + 0xff7944c05ce997f4, + 0x575d3de4b05c51a3, + 0x583381fd5a76847c, + 0x2d873ede7af6da9f, + // 2^64 * 8 * B + 0x157a316443373409, + 0xfab8b7eef4aa81d9, + 0xb093fee6f5a64806, + 0x2e773654707fa7b6, + 0xaa6202e14e5df981, + 0xa20d59175015e1f5, + 0x18a275d3bae21d6c, + 0x0543618a01600253, + 0x0deabdf4974c23c1, + 0xaa6f0a259dce4693, + 0x04202cb8a29aba2c, + 0x4b1443362d07960d, + // 2^68 * 1 * B + 0x47b837f753242cec, + 0x256dc48cc04212f2, + 0xe222fbfbe1d928c5, + 0x48ea295bad8a2c07, + 0x299b1c3f57c5715e, + 0x96cb929e6b686d90, + 0x3004806447235ab3, + 0x2c435c24a44d9fe1, + 0x0607c97c80f8833f, + 0x0e851578ca25ec5b, + 0x54f7450b161ebb6f, + 0x7bcb4792a0def80e, + // 2^68 * 2 * B + 0x8487e3d02bc73659, + 0x4baf8445059979df, + 0xd17c975adcad6fbf, + 0x57369f0bdefc96b6, + 0x1cecd0a0045224c2, + 0x757f1b1b69e53952, + 0x775b7a925289f681, + 0x1b6cc62016736148, + 0xf1a9990175638698, + 0x353dd1beeeaa60d3, + 0x849471334c9ba488, + 0x63fa6e6843ade311, + // 2^68 * 3 * B + 0xd15c20536597c168, + 0x9f73740098d28789, + 0x18aee7f13257ba1f, + 0x3418bfda07346f14, + 0x2195becdd24b5eb7, + 0x5e41f18cc0cd44f9, + 0xdf28074441ca9ede, + 0x07073b98f35b7d67, + 0xd03c676c4ce530d4, + 0x0b64c0473b5df9f4, + 0x065cef8b19b3a31e, + 0x3084d661533102c9, + // 2^68 * 4 * B + 0xe1f6b79ebf8469ad, + 0x15801004e2663135, + 0x9a498330af74181b, + 0x3ba2504f049b673c, + 0x9a6ce876760321fd, + 0x7fe2b5109eb63ad8, + 0x00e7d4ae8ac80592, + 0x73d86b7abb6f723a, + 0x0b52b5606dba5ab6, + 0xa9134f0fbbb1edab, + 0x30a9520d9b04a635, + 0x6813b8f37973e5db, + // 2^68 * 5 * B + 0x9854b054334127c1, + 0x105d047882fbff25, + 0xdb49f7f944186f4f, + 0x1768e838bed0b900, + 0xf194ca56f3157e29, + 0x136d35705ef528a5, + 0xdd4cef778b0599bc, + 0x7d5472af24f833ed, + 0xd0ef874daf33da47, + 0x00d3be5db6e339f9, + 0x3f2a8a2f9c9ceece, + 0x5d1aeb792352435a, + // 2^68 * 6 * B + 0xf59e6bb319cd63ca, + 0x670c159221d06839, + 0xb06d565b2150cab6, + 0x20fb199d104f12a3, + 0x12c7bfaeb61ba775, + 0xb84e621fe263bffd, + 0x0b47a5c35c840dcf, + 0x7e83be0bccaf8634, + 0x61943dee6d99c120, + 0x86101f2e460b9fe0, + 0x6bb2f1518ee8598d, + 0x76b76289fcc475cc, + // 2^68 * 7 * B + 0x791b4cc1756286fa, + 0xdbced317d74a157c, + 0x7e732421ea72bde6, + 0x01fe18491131c8e9, + 0x4245f1a1522ec0b3, + 0x558785b22a75656d, + 0x1d485a2548a1b3c0, + 0x60959eccd58fe09f, + 0x3ebfeb7ba8ed7a09, + 0x49fdc2bbe502789c, + 0x44ebce5d3c119428, + 0x35e1eb55be947f4a, + // 2^68 * 8 * B + 0xdbdae701c5738dd3, + 0xf9c6f635b26f1bee, + 0x61e96a8042f15ef4, + 0x3aa1d11faf60a4d8, + 0x14fd6dfa726ccc74, + 0x3b084cfe2f53b965, + 0xf33ae4f552a2c8b4, + 0x59aab07a0d40166a, + 0x77bcec4c925eac25, + 0x1848718460137738, + 0x5b374337fea9f451, + 0x1865e78ec8e6aa46, + // 2^72 * 1 * B + 0xccc4b7c7b66e1f7a, + 0x44157e25f50c2f7e, + 0x3ef06dfc713eaf1c, + 0x582f446752da63f7, + 0x967c54e91c529ccb, + 0x30f6269264c635fb, + 0x2747aff478121965, + 0x17038418eaf66f5c, + 0xc6317bd320324ce4, + 0xa81042e8a4488bc4, + 0xb21ef18b4e5a1364, + 0x0c2a1c4bcda28dc9, + // 2^72 * 2 * B + 0xd24dc7d06f1f0447, + 0xb2269e3edb87c059, + 0xd15b0272fbb2d28f, + 0x7c558bd1c6f64877, + 0xedc4814869bd6945, + 0x0d6d907dbe1c8d22, + 0xc63bd212d55cc5ab, + 0x5a6a9b30a314dc83, + 0xd0ec1524d396463d, + 0x12bb628ac35a24f0, + 0xa50c3a791cbc5fa4, + 0x0404a5ca0afbafc3, + // 2^72 * 3 * B + 0x8c1f40070aa743d6, + 0xccbad0cb5b265ee8, + 0x574b046b668fd2de, + 0x46395bfdcadd9633, + 0x62bc9e1b2a416fd1, + 0xb5c6f728e350598b, + 0x04343fd83d5d6967, + 0x39527516e7f8ee98, + 0x117fdb2d1a5d9a9c, + 0x9c7745bcd1005c2a, + 0xefd4bef154d56fea, + 0x76579a29e822d016, + // 2^72 * 4 * B + 0x45b68e7e49c02a17, + 0x23cd51a2bca9a37f, + 0x3ed65f11ec224c1b, + 0x43a384dc9e05bdb1, + 0x333cb51352b434f2, + 0xd832284993de80e1, + 0xb5512887750d35ce, + 0x02c514bb2a2777c1, + 0x684bd5da8bf1b645, + 0xfb8bd37ef6b54b53, + 0x313916d7a9b0d253, + 0x1160920961548059, + // 2^72 * 5 * B + 0xb44d166929dacfaa, + 0xda529f4c8413598f, + 0xe9ef63ca453d5559, + 0x351e125bc5698e0b, + 0x7a385616369b4dcd, + 0x75c02ca7655c3563, + 0x7dc21bf9d4f18021, + 0x2f637d7491e6e042, + 0xd4b49b461af67bbe, + 0xd603037ac8ab8961, + 0x71dee19ff9a699fb, + 0x7f182d06e7ce2a9a, + // 2^72 * 6 * B + 0x7a7c8e64ab0168ec, + 0xcb5a4a5515edc543, + 0x095519d347cd0eda, + 0x67d4ac8c343e93b0, + 0x09454b728e217522, + 0xaa58e8f4d484b8d8, + 0xd358254d7f46903c, + 0x44acc043241c5217, + 0x1c7d6bbb4f7a5777, + 0x8b35fed4918313e1, + 0x4adca1c6c96b4684, + 0x556d1c8312ad71bd, + // 2^72 * 7 * B + 0x17ef40e30c8d3982, + 0x31f7073e15a3fa34, + 0x4f21f3cb0773646e, + 0x746c6c6d1d824eff, + 0x81f06756b11be821, + 0x0faff82310a3f3dd, + 0xf8b2d0556a99465d, + 0x097abe38cc8c7f05, + 0x0c49c9877ea52da4, + 0x4c4369559bdc1d43, + 0x022c3809f7ccebd2, + 0x577e14a34bee84bd, + // 2^72 * 8 * B + 0xf0e268ac61a73b0a, + 0xf2fafa103791a5f5, + 0xc1e13e826b6d00e9, + 0x60fa7ee96fd78f42, + 0x94fecebebd4dd72b, + 0xf46a4fda060f2211, + 0x124a5977c0c8d1ff, + 0x705304b8fb009295, + 0xb63d1d354d296ec6, + 0xf3c3053e5fad31d8, + 0x670b958cb4bd42ec, + 0x21398e0ca16353fd, + // 2^76 * 1 * B + 0x216ab2ca8da7d2ef, + 0x366ad9dd99f42827, + 0xae64b9004fdd3c75, + 0x403a395b53909e62, + 0x86c5fc16861b7e9a, + 0xf6a330476a27c451, + 0x01667267a1e93597, + 0x05ffb9cd6082dfeb, + 0xa617fa9ff53f6139, + 0x60f2b5e513e66cb6, + 0xd7a8beefb3448aa4, + 0x7a2932856f5ea192, + // 2^76 * 2 * B + 0x0b39d761b02de888, + 0x5f550e7ed2414e1f, + 0xa6bfa45822e1a940, + 0x050a2f7dfd447b99, + 0xb89c444879639302, + 0x4ae4f19350c67f2c, + 0xf0b35da8c81af9c6, + 0x39d0003546871017, + 0x437c3b33a650db77, + 0x6bafe81dbac52bb2, + 0xfe99402d2db7d318, + 0x2b5b7eec372ba6ce, + // 2^76 * 3 * B + 0xb3bc4bbd83f50eef, + 0x508f0c998c927866, + 0x43e76587c8b7e66e, + 0x0f7655a3a47f98d9, + 0xa694404d613ac8f4, + 0x500c3c2bfa97e72c, + 0x874104d21fcec210, + 0x1b205fb38604a8ee, + 0x55ecad37d24b133c, + 0x441e147d6038c90b, + 0x656683a1d62c6fee, + 0x0157d5dc87e0ecae, + // 2^76 * 4 * B + 0xf2a7af510354c13d, + 0xd7a0b145aa372b60, + 0x2869b96a05a3d470, + 0x6528e42d82460173, + 0x95265514d71eb524, + 0xe603d8815df14593, + 0x147cdf410d4de6b7, + 0x5293b1730437c850, + 0x23d0e0814bccf226, + 0x92c745cd8196fb93, + 0x8b61796c59541e5b, + 0x40a44df0c021f978, + // 2^76 * 5 * B + 0xdaa869894f20ea6a, + 0xea14a3d14c620618, + 0x6001fccb090bf8be, + 0x35f4e822947e9cf0, + 0x86c96e514bc5d095, + 0xf20d4098fca6804a, + 0x27363d89c826ea5d, + 0x39ca36565719cacf, + 0x97506f2f6f87b75c, + 0xc624aea0034ae070, + 0x1ec856e3aad34dd6, + 0x055b0be0e440e58f, + // 2^76 * 6 * B + 0x6469a17d89735d12, + 0xdb6f27d5e662b9f1, + 0x9fcba3286a395681, + 0x363b8004d269af25, + 0x4d12a04b6ea33da2, + 0x57cf4c15e36126dd, + 0x90ec9675ee44d967, + 0x64ca348d2a985aac, + 0x99588e19e4c4912d, + 0xefcc3b4e1ca5ce6b, + 0x4522ea60fa5b98d5, + 0x7064bbab1de4a819, + // 2^76 * 7 * B + 0xb919e1515a770641, + 0xa9a2e2c74e7f8039, + 0x7527250b3df23109, + 0x756a7330ac27b78b, + 0xa290c06142542129, + 0xf2e2c2aebe8d5b90, + 0xcf2458db76abfe1b, + 0x02157ade83d626bf, + 0x3e46972a1b9a038b, + 0x2e4ee66a7ee03fb4, + 0x81a248776edbb4ca, + 0x1a944ee88ecd0563, + // 2^76 * 8 * B + 0xd5a91d1151039372, + 0x2ed377b799ca26de, + 0xa17202acfd366b6b, + 0x0730291bd6901995, + 0xbb40a859182362d6, + 0xb99f55778a4d1abb, + 0x8d18b427758559f6, + 0x26c20fe74d26235a, + 0x648d1d9fe9cc22f5, + 0x66bc561928dd577c, + 0x47d3ed21652439d1, + 0x49d271acedaf8b49, + // 2^80 * 1 * B + 0x89f5058a382b33f3, + 0x5ae2ba0bad48c0b4, + 0x8f93b503a53db36e, + 0x5aa3ed9d95a232e6, + 0x2798aaf9b4b75601, + 0x5eac72135c8dad72, + 0xd2ceaa6161b7a023, + 0x1bbfb284e98f7d4e, + 0x656777e9c7d96561, + 0xcb2b125472c78036, + 0x65053299d9506eee, + 0x4a07e14e5e8957cc, + // 2^80 * 2 * B + 0x4ee412cb980df999, + 0xa315d76f3c6ec771, + 0xbba5edde925c77fd, + 0x3f0bac391d313402, + 0x240b58cdc477a49b, + 0xfd38dade6447f017, + 0x19928d32a7c86aad, + 0x50af7aed84afa081, + 0x6e4fde0115f65be5, + 0x29982621216109b2, + 0x780205810badd6d9, + 0x1921a316baebd006, + // 2^80 * 3 * B + 0x89422f7edfb870fc, + 0x2c296beb4f76b3bd, + 0x0738f1d436c24df7, + 0x6458df41e273aeb0, + 0xd75aad9ad9f3c18b, + 0x566a0eef60b1c19c, + 0x3e9a0bac255c0ed9, + 0x7b049deca062c7f5, + 0xdccbe37a35444483, + 0x758879330fedbe93, + 0x786004c312c5dd87, + 0x6093dccbc2950e64, + // 2^80 * 4 * B + 0x1ff39a8585e0706d, + 0x36d0a5d8b3e73933, + 0x43b9f2e1718f453b, + 0x57d1ea084827a97c, + 0x6bdeeebe6084034b, + 0x3199c2b6780fb854, + 0x973376abb62d0695, + 0x6e3180c98b647d90, + 0xee7ab6e7a128b071, + 0xa4c1596d93a88baa, + 0xf7b4de82b2216130, + 0x363e999ddd97bd18, + // 2^80 * 5 * B + 0x96a843c135ee1fc4, + 0x976eb35508e4c8cf, + 0xb42f6801b58cd330, + 0x48ee9b78693a052b, + 0x2f1848dce24baec6, + 0x769b7255babcaf60, + 0x90cb3c6e3cefe931, + 0x231f979bc6f9b355, + 0x5c31de4bcc2af3c6, + 0xb04bb030fe208d1f, + 0xb78d7009c14fb466, + 0x079bfa9b08792413, + // 2^80 * 6 * B + 0xe3903a51da300df4, + 0x843964233da95ab0, + 0xed3cf12d0b356480, + 0x038c77f684817194, + 0xf3c9ed80a2d54245, + 0x0aa08b7877f63952, + 0xd76dac63d1085475, + 0x1ef4fb159470636b, + 0x854e5ee65b167bec, + 0x59590a4296d0cdc2, + 0x72b2df3498102199, + 0x575ee92a4a0bff56, + // 2^80 * 7 * B + 0xd4c080908a182fcf, + 0x30e170c299489dbd, + 0x05babd5752f733de, + 0x43d4e7112cd3fd00, + 0x5d46bc450aa4d801, + 0xc3af1227a533b9d8, + 0x389e3b262b8906c2, + 0x200a1e7e382f581b, + 0x518db967eaf93ac5, + 0x71bc989b056652c0, + 0xfe2b85d9567197f5, + 0x050eca52651e4e38, + // 2^80 * 8 * B + 0xc3431ade453f0c9c, + 0xe9f5045eff703b9b, + 0xfcd97ac9ed847b3d, + 0x4b0ee6c21c58f4c6, + 0x97ac397660e668ea, + 0x9b19bbfe153ab497, + 0x4cb179b534eca79f, + 0x6151c09fa131ae57, + 0x3af55c0dfdf05d96, + 0xdd262ee02ab4ee7a, + 0x11b2bb8712171709, + 0x1fef24fa800f030b, + // 2^84 * 1 * B + 0xb496123a6b6c6609, + 0xa750fe8580ab5938, + 0xf471bf39b7c27a5f, + 0x507903ce77ac193c, + 0xff91a66a90166220, + 0xf22552ae5bf1e009, + 0x7dff85d87f90df7c, + 0x4f620ffe0c736fb9, + 0x62f90d65dfde3e34, + 0xcf28c592b9fa5fad, + 0x99c86ef9c6164510, + 0x25d448044a256c84, + // 2^84 * 2 * B + 0xbd68230ec7e9b16f, + 0x0eb1b9c1c1c5795d, + 0x7943c8c495b6b1ff, + 0x2f9faf620bbacf5e, + 0x2c7c4415c9022b55, + 0x56a0d241812eb1fe, + 0xf02ea1c9d7b65e0d, + 0x4180512fd5323b26, + 0xa4ff3e698a48a5db, + 0xba6a3806bd95403b, + 0x9f7ce1af47d5b65d, + 0x15e087e55939d2fb, + // 2^84 * 3 * B + 0x12207543745c1496, + 0xdaff3cfdda38610c, + 0xe4e797272c71c34f, + 0x39c07b1934bdede9, + 0x8894186efb963f38, + 0x48a00e80dc639bd5, + 0xa4e8092be96c1c99, + 0x5a097d54ca573661, + 0x2d45892b17c9e755, + 0xd033fd7289308df8, + 0x6c2fe9d9525b8bd9, + 0x2edbecf1c11cc079, + // 2^84 * 4 * B + 0x1616a4e3c715a0d2, + 0x53623cb0f8341d4d, + 0x96ef5329c7e899cb, + 0x3d4e8dbba668baa6, + 0xee0f0fddd087a25f, + 0x9c7531555c3e34ee, + 0x660c572e8fab3ab5, + 0x0854fc44544cd3b2, + 0x61eba0c555edad19, + 0x24b533fef0a83de6, + 0x3b77042883baa5f8, + 0x678f82b898a47e8d, + // 2^84 * 5 * B + 0xb1491d0bd6900c54, + 0x3539722c9d132636, + 0x4db928920b362bc9, + 0x4d7cd1fea68b69df, + 0x1e09d94057775696, + 0xeed1265c3cd951db, + 0xfa9dac2b20bce16f, + 0x0f7f76e0e8d089f4, + 0x36d9ebc5d485b00c, + 0xa2596492e4adb365, + 0xc1659480c2119ccd, + 0x45306349186e0d5f, + // 2^84 * 6 * B + 0x94ddd0c1a6cdff1d, + 0x55f6f115e84213ae, + 0x6c935f85992fcf6a, + 0x067ee0f54a37f16f, + 0x96a414ec2b072491, + 0x1bb2218127a7b65b, + 0x6d2849596e8a4af0, + 0x65f3b08ccd27765f, + 0xecb29fff199801f7, + 0x9d361d1fa2a0f72f, + 0x25f11d2375fd2f49, + 0x124cefe80fe10fe2, + // 2^84 * 7 * B + 0x4c126cf9d18df255, + 0xc1d471e9147a63b6, + 0x2c6d3c73f3c93b5f, + 0x6be3a6a2e3ff86a2, + 0x1518e85b31b16489, + 0x8faadcb7db710bfb, + 0x39b0bdf4a14ae239, + 0x05f4cbea503d20c1, + 0xce040e9ec04145bc, + 0xc71ff4e208f6834c, + 0xbd546e8dab8847a3, + 0x64666aa0a4d2aba5, + // 2^84 * 8 * B + 0x6841435a7c06d912, + 0xca123c21bb3f830b, + 0xd4b37b27b1cbe278, + 0x1d753b84c76f5046, + 0xb0c53bf73337e94c, + 0x7cb5697e11e14f15, + 0x4b84abac1930c750, + 0x28dd4abfe0640468, + 0x7dc0b64c44cb9f44, + 0x18a3e1ace3925dbf, + 0x7a3034862d0457c4, + 0x4c498bf78a0c892e, + // 2^88 * 1 * B + 0x37d653fb1aa73196, + 0x0f9495303fd76418, + 0xad200b09fb3a17b2, + 0x544d49292fc8613e, + 0x22d2aff530976b86, + 0x8d90b806c2d24604, + 0xdca1896c4de5bae5, + 0x28005fe6c8340c17, + 0x6aefba9f34528688, + 0x5c1bff9425107da1, + 0xf75bbbcd66d94b36, + 0x72e472930f316dfa, + // 2^88 * 2 * B + 0x2695208c9781084f, + 0xb1502a0b23450ee1, + 0xfd9daea603efde02, + 0x5a9d2e8c2733a34c, + 0x07f3f635d32a7627, + 0x7aaa4d865f6566f0, + 0x3c85e79728d04450, + 0x1fee7f000fe06438, + 0x765305da03dbf7e5, + 0xa4daf2491434cdbd, + 0x7b4ad5cdd24a88ec, + 0x00f94051ee040543, + // 2^88 * 3 * B + 0x8d356b23c3d330b2, + 0xf21c8b9bb0471b06, + 0xb36c316c6e42b83c, + 0x07d79c7e8beab10d, + 0xd7ef93bb07af9753, + 0x583ed0cf3db766a7, + 0xce6998bf6e0b1ec5, + 0x47b7ffd25dd40452, + 0x87fbfb9cbc08dd12, + 0x8a066b3ae1eec29b, + 0x0d57242bdb1fc1bf, + 0x1c3520a35ea64bb6, + // 2^88 * 4 * B + 0x80d253a6bccba34a, + 0x3e61c3a13838219b, + 0x90c3b6019882e396, + 0x1c3d05775d0ee66f, + 0xcda86f40216bc059, + 0x1fbb231d12bcd87e, + 0xb4956a9e17c70990, + 0x38750c3b66d12e55, + 0x692ef1409422e51a, + 0xcbc0c73c2b5df671, + 0x21014fe7744ce029, + 0x0621e2c7d330487c, + // 2^88 * 5 * B + 0xaf9860cc8259838d, + 0x90ea48c1c69f9adc, + 0x6526483765581e30, + 0x0007d6097bd3a5bc, + 0xb7ae1796b0dbf0f3, + 0x54dfafb9e17ce196, + 0x25923071e9aaa3b4, + 0x5d8e589ca1002e9d, + 0xc0bf1d950842a94b, + 0xb2d3c363588f2e3e, + 0x0a961438bb51e2ef, + 0x1583d7783c1cbf86, + // 2^88 * 6 * B + 0xeceea2ef5da27ae1, + 0x597c3a1455670174, + 0xc9a62a126609167a, + 0x252a5f2e81ed8f70, + 0x90034704cc9d28c7, + 0x1d1b679ef72cc58f, + 0x16e12b5fbe5b8726, + 0x4958064e83c5580a, + 0x0d2894265066e80d, + 0xfcc3f785307c8c6b, + 0x1b53da780c1112fd, + 0x079c170bd843b388, + // 2^88 * 7 * B + 0x0506ece464fa6fff, + 0xbee3431e6205e523, + 0x3579422451b8ea42, + 0x6dec05e34ac9fb00, + 0xcdd6cd50c0d5d056, + 0x9af7686dbb03573b, + 0x3ca6723ff3c3ef48, + 0x6768c0d7317b8acc, + 0x94b625e5f155c1b3, + 0x417bf3a7997b7b91, + 0xc22cbddc6d6b2600, + 0x51445e14ddcd52f4, + // 2^88 * 8 * B + 0x57502b4b3b144951, + 0x8e67ff6b444bbcb3, + 0xb8bd6927166385db, + 0x13186f31e39295c8, + 0x893147ab2bbea455, + 0x8c53a24f92079129, + 0x4b49f948be30f7a7, + 0x12e990086e4fd43d, + 0xf10c96b37fdfbb2e, + 0x9f9a935e121ceaf9, + 0xdf1136c43a5b983f, + 0x77b2e3f05d3e99af, + // 2^92 * 1 * B + 0xfd0d75879cf12657, + 0xe82fef94e53a0e29, + 0xcc34a7f05bbb4be7, + 0x0b251172a50c38a2, + 0x9532f48fcc5cd29b, + 0x2ba851bea3ce3671, + 0x32dacaa051122941, + 0x478d99d9350004f2, + 0x1d5ad94890bb02c0, + 0x50e208b10ec25115, + 0xa26a22894ef21702, + 0x4dc923343b524805, + // 2^92 * 2 * B + 0xe3828c400f8086b6, + 0x3f77e6f7979f0dc8, + 0x7ef6de304df42cb4, + 0x5265797cb6abd784, + 0x3ad3e3ebf36c4975, + 0xd75d25a537862125, + 0xe873943da025a516, + 0x6bbc7cb4c411c847, + 0x3c6f9cd1d4a50d56, + 0xb6244077c6feab7e, + 0x6ff9bf483580972e, + 0x00375883b332acfb, + // 2^92 * 3 * B + 0x0001b2cd28cb0940, + 0x63fb51a06f1c24c9, + 0xb5ad8691dcd5ca31, + 0x67238dbd8c450660, + 0xc98bec856c75c99c, + 0xe44184c000e33cf4, + 0x0a676b9bba907634, + 0x669e2cb571f379d7, + 0xcb116b73a49bd308, + 0x025aad6b2392729e, + 0xb4793efa3f55d9b1, + 0x72a1056140678bb9, + // 2^92 * 4 * B + 0xa2b6812b1cc9249d, + 0x62866eee21211f58, + 0x2cb5c5b85df10ece, + 0x03a6b259e263ae00, + 0x0d8d2909e2e505b6, + 0x98ca78abc0291230, + 0x77ef5569a9b12327, + 0x7c77897b81439b47, + 0xf1c1b5e2de331cb5, + 0x5a9f5d8e15fca420, + 0x9fa438f17bd932b1, + 0x2a381bf01c6146e7, + // 2^92 * 5 * B + 0xac9b9879cfc811c1, + 0x8b7d29813756e567, + 0x50da4e607c70edfc, + 0x5dbca62f884400b6, + 0xf7c0be32b534166f, + 0x27e6ca6419cf70d4, + 0x934df7d7a957a759, + 0x5701461dabdec2aa, + 0x2c6747402c915c25, + 0x1bdcd1a80b0d340a, + 0x5e5601bd07b43f5f, + 0x2555b4e05539a242, + // 2^92 * 6 * B + 0x6fc09f5266ddd216, + 0xdce560a7c8e37048, + 0xec65939da2df62fd, + 0x7a869ae7e52ed192, + 0x78409b1d87e463d4, + 0xad4da95acdfb639d, + 0xec28773755259b9c, + 0x69c806e9c31230ab, + 0x7b48f57414bb3f22, + 0x68c7cee4aedccc88, + 0xed2f936179ed80be, + 0x25d70b885f77bc4b, + // 2^92 * 7 * B + 0x4151c3d9762bf4de, + 0x083f435f2745d82b, + 0x29775a2e0d23ddd5, + 0x138e3a6269a5db24, + 0x98459d29bb1ae4d4, + 0x56b9c4c739f954ec, + 0x832743f6c29b4b3e, + 0x21ea8e2798b6878a, + 0x87bef4b46a5a7b9c, + 0xd2299d1b5fc1d062, + 0x82409818dd321648, + 0x5c5abeb1e5a2e03d, + // 2^92 * 8 * B + 0x14722af4b73c2ddb, + 0xbc470c5f5a05060d, + 0x00943eac2581b02e, + 0x0e434b3b1f499c8f, + 0x02cde6de1306a233, + 0x7b5a52a2116f8ec7, + 0xe1c681f4c1163b5b, + 0x241d350660d32643, + 0x6be4404d0ebc52c7, + 0xae46233bb1a791f5, + 0x2aec170ed25db42b, + 0x1d8dfd966645d694, + // 2^96 * 1 * B + 0x296fa9c59c2ec4de, + 0xbc8b61bf4f84f3cb, + 0x1c7706d917a8f908, + 0x63b795fc7ad3255d, + 0xd598639c12ddb0a4, + 0xa5d19f30c024866b, + 0xd17c2f0358fce460, + 0x07a195152e095e8a, + 0xa8368f02389e5fc8, + 0x90433b02cf8de43b, + 0xafa1fd5dc5412643, + 0x3e8fe83d032f0137, + // 2^96 * 2 * B + 0x2f8b15b90570a294, + 0x94f2427067084549, + 0xde1c5ae161bbfd84, + 0x75ba3b797fac4007, + 0x08704c8de8efd13c, + 0xdfc51a8e33e03731, + 0xa59d5da51260cde3, + 0x22d60899a6258c86, + 0x6239dbc070cdd196, + 0x60fe8a8b6c7d8a9a, + 0xb38847bceb401260, + 0x0904d07b87779e5e, + // 2^96 * 3 * B + 0xb4ce1fd4ddba919c, + 0xcf31db3ec74c8daa, + 0x2c63cc63ad86cc51, + 0x43e2143fbc1dde07, + 0xf4322d6648f940b9, + 0x06952f0cbd2d0c39, + 0x167697ada081f931, + 0x6240aacebaf72a6c, + 0xf834749c5ba295a0, + 0xd6947c5bca37d25a, + 0x66f13ba7e7c9316a, + 0x56bdaf238db40cac, + // 2^96 * 4 * B + 0x362ab9e3f53533eb, + 0x338568d56eb93d40, + 0x9e0e14521d5a5572, + 0x1d24a86d83741318, + 0x1310d36cc19d3bb2, + 0x062a6bb7622386b9, + 0x7c9b8591d7a14f5c, + 0x03aa31507e1e5754, + 0xf4ec7648ffd4ce1f, + 0xe045eaf054ac8c1c, + 0x88d225821d09357c, + 0x43b261dc9aeb4859, + // 2^96 * 5 * B + 0xe55b1e1988bb79bb, + 0xa09ed07dc17a359d, + 0xb02c2ee2603dea33, + 0x326055cf5b276bc2, + 0x19513d8b6c951364, + 0x94fe7126000bf47b, + 0x028d10ddd54f9567, + 0x02b4d5e242940964, + 0xb4a155cb28d18df2, + 0xeacc4646186ce508, + 0xc49cf4936c824389, + 0x27a6c809ae5d3410, + // 2^96 * 6 * B + 0x8ba6ebcd1f0db188, + 0x37d3d73a675a5be8, + 0xf22edfa315f5585a, + 0x2cb67174ff60a17e, + 0xcd2c270ac43d6954, + 0xdd4a3e576a66cab2, + 0x79fa592469d7036c, + 0x221503603d8c2599, + 0x59eecdf9390be1d0, + 0xa9422044728ce3f1, + 0x82891c667a94f0f4, + 0x7b1df4b73890f436, + // 2^96 * 7 * B + 0xe492f2e0b3b2a224, + 0x7c6c9e062b551160, + 0x15eb8fe20d7f7b0e, + 0x61fcef2658fc5992, + 0x5f2e221807f8f58c, + 0xe3555c9fd49409d4, + 0xb2aaa88d1fb6a630, + 0x68698245d352e03d, + 0xdbb15d852a18187a, + 0xf3e4aad386ddacd7, + 0x44bae2810ff6c482, + 0x46cf4c473daf01cf, + // 2^96 * 8 * B + 0x426525ed9ec4e5f9, + 0x0e5eda0116903303, + 0x72b1a7f2cbe5cadc, + 0x29387bcd14eb5f40, + 0x213c6ea7f1498140, + 0x7c1e7ef8392b4854, + 0x2488c38c5629ceba, + 0x1065aae50d8cc5bb, + 0x1c2c4525df200d57, + 0x5c3b2dd6bfca674a, + 0x0a07e7b1e1834030, + 0x69a198e64f1ce716, + // 2^100 * 1 * B + 0x7afcd613efa9d697, + 0x0cc45aa41c067959, + 0xa56fe104c1fada96, + 0x3a73b70472e40365, + 0x7b26e56b9e2d4734, + 0xc4c7132b81c61675, + 0xef5c9525ec9cde7f, + 0x39c80b16e71743ad, + 0x0f196e0d1b826c68, + 0xf71ff0e24960e3db, + 0x6113167023b7436c, + 0x0cf0ea5877da7282, + // 2^100 * 2 * B + 0x196c80a4ddd4ccbd, + 0x22e6f55d95f2dd9d, + 0xc75e33c740d6c71b, + 0x7bb51279cb3c042f, + 0xe332ced43ba6945a, + 0xde0b1361e881c05d, + 0x1ad40f095e67ed3b, + 0x5da8acdab8c63d5d, + 0xc4b6664a3a70159f, + 0x76194f0f0a904e14, + 0xa5614c39a4096c13, + 0x6cd0ff50979feced, + // 2^100 * 3 * B + 0xc0e067e78f4428ac, + 0x14835ab0a61135e3, + 0xf21d14f338062935, + 0x6390a4c8df04849c, + 0x7fecfabdb04ba18e, + 0xd0fc7bfc3bddbcf7, + 0xa41d486e057a131c, + 0x641a4391f2223a61, + 0xc5c6b95aa606a8db, + 0x914b7f9eb06825f1, + 0x2a731f6b44fc9eff, + 0x30ddf38562705cfc, + // 2^100 * 4 * B + 0x4e3dcbdad1bff7f9, + 0xc9118e8220645717, + 0xbacccebc0f189d56, + 0x1b4822e9d4467668, + 0x33bef2bd68bcd52c, + 0xc649dbb069482ef2, + 0xb5b6ee0c41cb1aee, + 0x5c294d270212a7e5, + 0xab360a7f25563781, + 0x2512228a480f7958, + 0xc75d05276114b4e3, + 0x222d9625d976fe2a, + // 2^100 * 5 * B + 0x1c717f85b372ace1, + 0x81930e694638bf18, + 0x239cad056bc08b58, + 0x0b34271c87f8fff4, + 0x0f94be7e0a344f85, + 0xeb2faa8c87f22c38, + 0x9ce1e75e4ee16f0f, + 0x43e64e5418a08dea, + 0x8155e2521a35ce63, + 0xbe100d4df912028e, + 0xbff80bf8a57ddcec, + 0x57342dc96d6bc6e4, + // 2^100 * 6 * B + 0xefeef065c8ce5998, + 0xbf029510b5cbeaa2, + 0x8c64a10620b7c458, + 0x35134fb231c24855, + 0xf3c3bcb71e707bf6, + 0x351d9b8c7291a762, + 0x00502e6edad69a33, + 0x522f521f1ec8807f, + 0x272c1f46f9a3902b, + 0xc91ba3b799657bcc, + 0xae614b304f8a1c0e, + 0x7afcaad70b99017b, + // 2^100 * 7 * B + 0xc25ded54a4b8be41, + 0x902d13e11bb0e2dd, + 0x41f43233cde82ab2, + 0x1085faa5c3aae7cb, + 0xa88141ecef842b6b, + 0x55e7b14797abe6c5, + 0x8c748f9703784ffe, + 0x5b50a1f7afcd00b7, + 0x9b840f66f1361315, + 0x18462242701003e9, + 0x65ed45fae4a25080, + 0x0a2862393fda7320, + // 2^100 * 8 * B + 0x46ab13c8347cbc9d, + 0x3849e8d499c12383, + 0x4cea314087d64ac9, + 0x1f354134b1a29ee7, + 0x960e737b6ecb9d17, + 0xfaf24948d67ceae1, + 0x37e7a9b4d55e1b89, + 0x5cb7173cb46c59eb, + 0x4a89e68b82b7abf0, + 0xf41cd9279ba6b7b9, + 0x16e6c210e18d876f, + 0x7cacdb0f7f1b09c6, + // 2^104 * 1 * B + 0x9062b2e0d91a78bc, + 0x47c9889cc8509667, + 0x9df54a66405070b8, + 0x7369e6a92493a1bf, + 0xe1014434dcc5caed, + 0x47ed5d963c84fb33, + 0x70019576ed86a0e7, + 0x25b2697bd267f9e4, + 0x9d673ffb13986864, + 0x3ca5fbd9415dc7b8, + 0xe04ecc3bdf273b5e, + 0x1420683db54e4cd2, + // 2^104 * 2 * B + 0xb478bd1e249dd197, + 0x620c35005e58c102, + 0xfb02d32fccbaac5c, + 0x60b63bebf508a72d, + 0x34eebb6fc1cc5ad0, + 0x6a1b0ce99646ac8b, + 0xd3b0da49a66bde53, + 0x31e83b4161d081c1, + 0x97e8c7129e062b4f, + 0x49e48f4f29320ad8, + 0x5bece14b6f18683f, + 0x55cf1eb62d550317, + // 2^104 * 3 * B + 0x5879101065c23d58, + 0x8b9d086d5094819c, + 0xe2402fa912c55fa7, + 0x669a6564570891d4, + 0x3076b5e37df58c52, + 0xd73ab9dde799cc36, + 0xbd831ce34913ee20, + 0x1a56fbaa62ba0133, + 0x943e6b505c9dc9ec, + 0x302557bba77c371a, + 0x9873ae5641347651, + 0x13c4836799c58a5c, + // 2^104 * 4 * B + 0x423a5d465ab3e1b9, + 0xfc13c187c7f13f61, + 0x19f83664ecb5b9b6, + 0x66f80c93a637b607, + 0xc4dcfb6a5d8bd080, + 0xdeebc4ec571a4842, + 0xd4b2e883b8e55365, + 0x50bdc87dc8e5b827, + 0x606d37836edfe111, + 0x32353e15f011abd9, + 0x64b03ac325b73b96, + 0x1dd56444725fd5ae, + // 2^104 * 5 * B + 0x8fa47ff83362127d, + 0xbc9f6ac471cd7c15, + 0x6e71454349220c8b, + 0x0e645912219f732e, + 0xc297e60008bac89a, + 0x7d4cea11eae1c3e0, + 0xf3e38be19fe7977c, + 0x3a3a450f63a305cd, + 0x078f2f31d8394627, + 0x389d3183de94a510, + 0xd1e36c6d17996f80, + 0x318c8d9393a9a87b, + // 2^104 * 6 * B + 0xf2745d032afffe19, + 0x0c9f3c497f24db66, + 0xbc98d3e3ba8598ef, + 0x224c7c679a1d5314, + 0x5d669e29ab1dd398, + 0xfc921658342d9e3b, + 0x55851dfdf35973cd, + 0x509a41c325950af6, + 0xbdc06edca6f925e9, + 0x793ef3f4641b1f33, + 0x82ec12809d833e89, + 0x05bff02328a11389, + // 2^104 * 7 * B + 0x3632137023cae00b, + 0x544acf0ad1accf59, + 0x96741049d21a1c88, + 0x780b8cc3fa2a44a7, + 0x6881a0dd0dc512e4, + 0x4fe70dc844a5fafe, + 0x1f748e6b8f4a5240, + 0x576277cdee01a3ea, + 0x1ef38abc234f305f, + 0x9a577fbd1405de08, + 0x5e82a51434e62a0d, + 0x5ff418726271b7a1, + // 2^104 * 8 * B + 0x398e080c1789db9d, + 0xa7602025f3e778f5, + 0xfa98894c06bd035d, + 0x106a03dc25a966be, + 0xe5db47e813b69540, + 0xf35d2a3b432610e1, + 0xac1f26e938781276, + 0x29d4db8ca0a0cb69, + 0xd9ad0aaf333353d0, + 0x38669da5acd309e5, + 0x3c57658ac888f7f0, + 0x4ab38a51052cbefa, + // 2^108 * 1 * B + 0xdfdacbee4324c0e9, + 0x054442883f955bb7, + 0xdef7aaa8ea31609f, + 0x68aee70642287cff, + 0xf68fe2e8809de054, + 0xe3bc096a9c82bad1, + 0x076353d40aadbf45, + 0x7b9b1fb5dea1959e, + 0xf01cc8f17471cc0c, + 0x95242e37579082bb, + 0x27776093d3e46b5f, + 0x2d13d55a28bd85fb, + // 2^108 * 2 * B + 0xfac5d2065b35b8da, + 0xa8da8a9a85624bb7, + 0xccd2ca913d21cd0f, + 0x6b8341ee8bf90d58, + 0xbf019cce7aee7a52, + 0xa8ded2b6e454ead3, + 0x3c619f0b87a8bb19, + 0x3619b5d7560916d8, + 0x3579f26b0282c4b2, + 0x64d592f24fafefae, + 0xb7cded7b28c8c7c0, + 0x6a927b6b7173a8d7, + // 2^108 * 3 * B + 0x1f6db24f986e4656, + 0x1021c02ed1e9105b, + 0xf8ff3fff2cc0a375, + 0x1d2a6bf8c6c82592, + 0x8d7040863ece88eb, + 0xf0e307a980eec08c, + 0xac2250610d788fda, + 0x056d92a43a0d478d, + 0x1b05a196fc3da5a1, + 0x77d7a8c243b59ed0, + 0x06da3d6297d17918, + 0x66fbb494f12353f7, + // 2^108 * 4 * B + 0x751a50b9d85c0fb8, + 0xd1afdc258bcf097b, + 0x2f16a6a38309a969, + 0x14ddff9ee5b00659, + 0xd6d70996f12309d6, + 0xdbfb2385e9c3d539, + 0x46d602b0f7552411, + 0x270a0b0557843e0c, + 0x61ff0640a7862bcc, + 0x81cac09a5f11abfe, + 0x9047830455d12abb, + 0x19a4bde1945ae873, + // 2^108 * 5 * B + 0x9b9f26f520a6200a, + 0x64804443cf13eaf8, + 0x8a63673f8631edd3, + 0x72bbbce11ed39dc1, + 0x40c709dec076c49f, + 0x657bfaf27f3e53f6, + 0x40662331eca042c4, + 0x14b375487eb4df04, + 0xae853c94ab66dc47, + 0xeb62343edf762d6e, + 0xf08e0e186fb2f7d1, + 0x4f0b1c02700ab37a, + // 2^108 * 6 * B + 0xe1706787d81951fa, + 0xa10a2c8eb290c77b, + 0xe7382fa03ed66773, + 0x0a4d84710bcc4b54, + 0x79fd21ccc1b2e23f, + 0x4ae7c281453df52a, + 0xc8172ec9d151486b, + 0x68abe9443e0a7534, + 0xda12c6c407831dcb, + 0x0da230d74d5c510d, + 0x4ab1531e6bd404e1, + 0x4106b166bcf440ef, + // 2^108 * 7 * B + 0x02e57a421cd23668, + 0x4ad9fb5d0eaef6fd, + 0x954e6727b1244480, + 0x7f792f9d2699f331, + 0xa485ccd539e4ecf2, + 0x5aa3f3ad0555bab5, + 0x145e3439937df82d, + 0x1238b51e1214283f, + 0x0b886b925fd4d924, + 0x60906f7a3626a80d, + 0xecd367b4b98abd12, + 0x2876beb1def344cf, + // 2^108 * 8 * B + 0xdc84e93563144691, + 0x632fe8a0d61f23f4, + 0x4caa800612a9a8d5, + 0x48f9dbfa0e9918d3, + 0xd594b3333a8a85f8, + 0x4ea37689e78d7d58, + 0x73bf9f455e8e351f, + 0x5507d7d2bc41ebb4, + 0x1ceb2903299572fc, + 0x7c8ccaa29502d0ee, + 0x91bfa43411cce67b, + 0x5784481964a831e7, + // 2^112 * 1 * B + 0xda7c2b256768d593, + 0x98c1c0574422ca13, + 0xf1a80bd5ca0ace1d, + 0x29cdd1adc088a690, + 0xd6cfd1ef5fddc09c, + 0xe82b3efdf7575dce, + 0x25d56b5d201634c2, + 0x3041c6bb04ed2b9b, + 0x0ff2f2f9d956e148, + 0xade797759f356b2e, + 0x1a4698bb5f6c025c, + 0x104bbd6814049a7b, + // 2^112 * 2 * B + 0x51f0fd3168f1ed67, + 0x2c811dcdd86f3bc2, + 0x44dc5c4304d2f2de, + 0x5be8cc57092a7149, + 0xa95d9a5fd67ff163, + 0xe92be69d4cc75681, + 0xb7f8024cde20f257, + 0x204f2a20fb072df5, + 0xc8143b3d30ebb079, + 0x7589155abd652e30, + 0x653c3c318f6d5c31, + 0x2570fb17c279161f, + // 2^112 * 3 * B + 0x3efa367f2cb61575, + 0xf5f96f761cd6026c, + 0xe8c7142a65b52562, + 0x3dcb65ea53030acd, + 0x192ea9550bb8245a, + 0xc8e6fba88f9050d1, + 0x7986ea2d88a4c935, + 0x241c5f91de018668, + 0x28d8172940de6caa, + 0x8fbf2cf022d9733a, + 0x16d7fcdd235b01d1, + 0x08420edd5fcdf0e5, + // 2^112 * 4 * B + 0xcdff20ab8362fa4a, + 0x57e118d4e21a3e6e, + 0xe3179617fc39e62b, + 0x0d9a53efbc1769fd, + 0x0358c34e04f410ce, + 0xb6135b5a276e0685, + 0x5d9670c7ebb91521, + 0x04d654f321db889c, + 0x5e7dc116ddbdb5d5, + 0x2954deb68da5dd2d, + 0x1cb608173334a292, + 0x4a7a4f2618991ad7, + // 2^112 * 5 * B + 0xf4a718025fb15f95, + 0x3df65f346b5c1b8f, + 0xcdfcf08500e01112, + 0x11b50c4cddd31848, + 0x24c3b291af372a4b, + 0x93da8270718147f2, + 0xdd84856486899ef2, + 0x4a96314223e0ee33, + 0xa6e8274408a4ffd6, + 0x738e177e9c1576d9, + 0x773348b63d02b3f2, + 0x4f4bce4dce6bcc51, + // 2^112 * 6 * B + 0xa71fce5ae2242584, + 0x26ea725692f58a9e, + 0xd21a09d71cea3cf4, + 0x73fcdd14b71c01e6, + 0x30e2616ec49d0b6f, + 0xe456718fcaec2317, + 0x48eb409bf26b4fa6, + 0x3042cee561595f37, + 0x427e7079449bac41, + 0x855ae36dbce2310a, + 0x4cae76215f841a7c, + 0x389e740c9a9ce1d6, + // 2^112 * 7 * B + 0x64fcb3ae34dcb9ce, + 0x97500323e348d0ad, + 0x45b3f07d62c6381b, + 0x61545379465a6788, + 0xc9bd78f6570eac28, + 0xe55b0b3227919ce1, + 0x65fc3eaba19b91ed, + 0x25c425e5d6263690, + 0x3f3e06a6f1d7de6e, + 0x3ef976278e062308, + 0x8c14f6264e8a6c77, + 0x6539a08915484759, + // 2^112 * 8 * B + 0xe9d21f74c3d2f773, + 0xc150544125c46845, + 0x624e5ce8f9b99e33, + 0x11c5e4aac5cd186c, + 0xddc4dbd414bb4a19, + 0x19b2bc3c98424f8e, + 0x48a89fd736ca7169, + 0x0f65320ef019bd90, + 0xd486d1b1cafde0c6, + 0x4f3fe6e3163b5181, + 0x59a8af0dfaf2939a, + 0x4cabc7bdec33072a, + // 2^116 * 1 * B + 0x16faa8fb532f7428, + 0xdbd42ea046a4e272, + 0x5337653b8b9ea480, + 0x4065947223973f03, + 0xf7c0a19c1a54a044, + 0x4a1c5e2477bd9fbb, + 0xa6e3ca115af22972, + 0x1819bb953f2e9e0d, + 0x498fbb795e042e84, + 0x7d0dd89a7698b714, + 0x8bfb0ba427fe6295, + 0x36ba82e721200524, + // 2^116 * 2 * B + 0xd60ecbb74245ec41, + 0xfd9be89e34348716, + 0xc9240afee42284de, + 0x4472f648d0531db4, + 0xc8d69d0a57274ed5, + 0x45ba803260804b17, + 0xdf3cda102255dfac, + 0x77d221232709b339, + 0x498a6d7064ad94d8, + 0xa5b5c8fd9af62263, + 0x8ca8ed0545c141f4, + 0x2c63bec3662d358c, + // 2^116 * 3 * B + 0x7fe60d8bea787955, + 0xb9dc117eb5f401b7, + 0x91c7c09a19355cce, + 0x22692ef59442bedf, + 0x9a518b3a8586f8bf, + 0x9ee71af6cbb196f0, + 0xaa0625e6a2385cf2, + 0x1deb2176ddd7c8d1, + 0x8563d19a2066cf6c, + 0x401bfd8c4dcc7cd7, + 0xd976a6becd0d8f62, + 0x67cfd773a278b05e, + // 2^116 * 4 * B + 0x8dec31faef3ee475, + 0x99dbff8a9e22fd92, + 0x512d11594e26cab1, + 0x0cde561eec4310b9, + 0x2d5fa9855a4e586a, + 0x65f8f7a449beab7e, + 0xaa074dddf21d33d3, + 0x185cba721bcb9dee, + 0x93869da3f4e3cb41, + 0xbf0392f540f7977e, + 0x026204fcd0463b83, + 0x3ec91a769eec6eed, + // 2^116 * 5 * B + 0x1e9df75bf78166ad, + 0x4dfda838eb0cd7af, + 0xba002ed8c1eaf988, + 0x13fedb3e11f33cfc, + 0x0fad2fb7b0a3402f, + 0x46615ecbfb69f4a8, + 0xf745bcc8c5f8eaa6, + 0x7a5fa8794a94e896, + 0x52958faa13cd67a1, + 0x965ee0818bdbb517, + 0x16e58daa2e8845b3, + 0x357d397d5499da8f, + // 2^116 * 6 * B + 0x1ebfa05fb0bace6c, + 0xc934620c1caf9a1e, + 0xcc771cc41d82b61a, + 0x2d94a16aa5f74fec, + 0x481dacb4194bfbf8, + 0x4d77e3f1bae58299, + 0x1ef4612e7d1372a0, + 0x3a8d867e70ff69e1, + 0x6f58cd5d55aff958, + 0xba3eaa5c75567721, + 0x75c123999165227d, + 0x69be1343c2f2b35e, + // 2^116 * 7 * B + 0x0e091d5ee197c92a, + 0x4f51019f2945119f, + 0x143679b9f034e99c, + 0x7d88112e4d24c696, + 0x82bbbdac684b8de3, + 0xa2f4c7d03fca0718, + 0x337f92fbe096aaa8, + 0x200d4d8c63587376, + 0x208aed4b4893b32b, + 0x3efbf23ebe59b964, + 0xd762deb0dba5e507, + 0x69607bd681bd9d94, + // 2^116 * 8 * B + 0xf6be021068de1ce1, + 0xe8d518e70edcbc1f, + 0xe3effdd01b5505a5, + 0x35f63353d3ec3fd0, + 0x3b7f3bd49323a902, + 0x7c21b5566b2c6e53, + 0xe5ba8ff53a7852a7, + 0x28bc77a5838ece00, + 0x63ba78a8e25d8036, + 0x63651e0094333490, + 0x48d82f20288ce532, + 0x3a31abfa36b57524, + // 2^120 * 1 * B + 0x239e9624089c0a2e, + 0xc748c4c03afe4738, + 0x17dbed2a764fa12a, + 0x639b93f0321c8582, + 0xc08f788f3f78d289, + 0xfe30a72ca1404d9f, + 0xf2778bfccf65cc9d, + 0x7ee498165acb2021, + 0x7bd508e39111a1c3, + 0x2b2b90d480907489, + 0xe7d2aec2ae72fd19, + 0x0edf493c85b602a6, + // 2^120 * 2 * B + 0xaecc8158599b5a68, + 0xea574f0febade20e, + 0x4fe41d7422b67f07, + 0x403b92e3019d4fb4, + 0x6767c4d284764113, + 0xa090403ff7f5f835, + 0x1c8fcffacae6bede, + 0x04c00c54d1dfa369, + 0x4dc22f818b465cf8, + 0x71a0f35a1480eff8, + 0xaee8bfad04c7d657, + 0x355bb12ab26176f4, + // 2^120 * 3 * B + 0xa71e64cc7493bbf4, + 0xe5bd84d9eca3b0c3, + 0x0a6bc50cfa05e785, + 0x0f9b8132182ec312, + 0xa301dac75a8c7318, + 0xed90039db3ceaa11, + 0x6f077cbf3bae3f2d, + 0x7518eaf8e052ad8e, + 0xa48859c41b7f6c32, + 0x0f2d60bcf4383298, + 0x1815a929c9b1d1d9, + 0x47c3871bbb1755c4, + // 2^120 * 4 * B + 0x5144539771ec4f48, + 0xf805b17dc98c5d6e, + 0xf762c11a47c3c66b, + 0x00b89b85764699dc, + 0xfbe65d50c85066b0, + 0x62ecc4b0b3a299b0, + 0xe53754ea441ae8e0, + 0x08fea02ce8d48d5f, + 0x824ddd7668deead0, + 0xc86445204b685d23, + 0xb514cfcd5d89d665, + 0x473829a74f75d537, + // 2^120 * 5 * B + 0x82d2da754679c418, + 0xe63bd7d8b2618df0, + 0x355eef24ac47eb0a, + 0x2078684c4833c6b4, + 0x23d9533aad3902c9, + 0x64c2ddceef03588f, + 0x15257390cfe12fb4, + 0x6c668b4d44e4d390, + 0x3b48cf217a78820c, + 0xf76a0ab281273e97, + 0xa96c65a78c8eed7b, + 0x7411a6054f8a433f, + // 2^120 * 6 * B + 0x4d659d32b99dc86d, + 0x044cdc75603af115, + 0xb34c712cdcc2e488, + 0x7c136574fb8134ff, + 0x579ae53d18b175b4, + 0x68713159f392a102, + 0x8455ecba1eef35f5, + 0x1ec9a872458c398f, + 0xb8e6a4d400a2509b, + 0x9b81d7020bc882b4, + 0x57e7cc9bf1957561, + 0x3add88a5c7cd6460, + // 2^120 * 7 * B + 0xab895770b635dcf2, + 0x02dfef6cf66c1fbc, + 0x85530268beb6d187, + 0x249929fccc879e74, + 0x85c298d459393046, + 0x8f7e35985ff659ec, + 0x1d2ca22af2f66e3a, + 0x61ba1131a406a720, + 0xa3d0a0f116959029, + 0x023b6b6cba7ebd89, + 0x7bf15a3e26783307, + 0x5620310cbbd8ece7, + // 2^120 * 8 * B + 0x528993434934d643, + 0xb9dbf806a51222f5, + 0x8f6d878fc3f41c22, + 0x37676a2a4d9d9730, + 0x6646b5f477e285d6, + 0x40e8ff676c8f6193, + 0xa6ec7311abb594dd, + 0x7ec846f3658cec4d, + 0x9b5e8f3f1da22ec7, + 0x130f1d776c01cd13, + 0x214c8fcfa2989fb8, + 0x6daaf723399b9dd5, + // 2^124 * 1 * B + 0x591e4a5610628564, + 0x2a4bb87ca8b4df34, + 0xde2a2572e7a38e43, + 0x3cbdabd9fee5046e, + 0x81aebbdd2cd13070, + 0x962e4325f85a0e9e, + 0xde9391aacadffecb, + 0x53177fda52c230e6, + 0xa7bc970650b9de79, + 0x3d12a7fbc301b59b, + 0x02652e68d36ae38c, + 0x79d739835a6199dc, + // 2^124 * 2 * B + 0xd9354df64131c1bd, + 0x758094a186ec5822, + 0x4464ee12e459f3c2, + 0x6c11fce4cb133282, + 0x21c9d9920d591737, + 0x9bea41d2e9b46cd6, + 0xe20e84200d89bfca, + 0x79d99f946eae5ff8, + 0xf17b483568673205, + 0x387deae83caad96c, + 0x61b471fd56ffe386, + 0x31741195b745a599, + // 2^124 * 3 * B + 0xe8d10190b77a360b, + 0x99b983209995e702, + 0xbd4fdff8fa0247aa, + 0x2772e344e0d36a87, + 0x17f8ba683b02a047, + 0x50212096feefb6c8, + 0x70139be21556cbe2, + 0x203e44a11d98915b, + 0xd6863eba37b9e39f, + 0x105bc169723b5a23, + 0x104f6459a65c0762, + 0x567951295b4d38d4, + // 2^124 * 4 * B + 0x535fd60613037524, + 0xe210adf6b0fbc26a, + 0xac8d0a9b23e990ae, + 0x47204d08d72fdbf9, + 0x07242eb30d4b497f, + 0x1ef96306b9bccc87, + 0x37950934d8116f45, + 0x05468d6201405b04, + 0x00f565a9f93267de, + 0xcecfd78dc0d58e8a, + 0xa215e2dcf318e28e, + 0x4599ee919b633352, + // 2^124 * 5 * B + 0xd3c220ca70e0e76b, + 0xb12bea58ea9f3094, + 0x294ddec8c3271282, + 0x0c3539e1a1d1d028, + 0xac746d6b861ae579, + 0x31ab0650f6aea9dc, + 0x241d661140256d4c, + 0x2f485e853d21a5de, + 0x329744839c0833f3, + 0x6fe6257fd2abc484, + 0x5327d1814b358817, + 0x65712585893fe9bc, + // 2^124 * 6 * B + 0x9c102fb732a61161, + 0xe48e10dd34d520a8, + 0x365c63546f9a9176, + 0x32f6fe4c046f6006, + 0x81c29f1bd708ee3f, + 0xddcb5a05ae6407d0, + 0x97aec1d7d2a3eba7, + 0x1590521a91d50831, + 0x40a3a11ec7910acc, + 0x9013dff8f16d27ae, + 0x1a9720d8abb195d4, + 0x1bb9fe452ea98463, + // 2^124 * 7 * B + 0xe9d1d950b3d54f9e, + 0x2d5f9cbee00d33c1, + 0x51c2c656a04fc6ac, + 0x65c091ee3c1cbcc9, + 0xcf5e6c95cc36747c, + 0x294201536b0bc30d, + 0x453ac67cee797af0, + 0x5eae6ab32a8bb3c9, + 0x7083661114f118ea, + 0x2b37b87b94349cad, + 0x7273f51cb4e99f40, + 0x78a2a95823d75698, + // 2^124 * 8 * B + 0xa2b072e95c8c2ace, + 0x69cffc96651e9c4b, + 0x44328ef842e7b42b, + 0x5dd996c122aadeb3, + 0xb4f23c425ef83207, + 0xabf894d3c9a934b5, + 0xd0708c1339fd87f7, + 0x1876789117166130, + 0x925b5ef0670c507c, + 0x819bc842b93c33bf, + 0x10792e9a70dd003f, + 0x59ad4b7a6e28dc74, + // 2^128 * 1 * B + 0x5f3a7562eb3dbe47, + 0xf7ea38548ebda0b8, + 0x00c3e53145747299, + 0x1304e9e71627d551, + 0x583b04bfacad8ea2, + 0x29b743e8148be884, + 0x2b1e583b0810c5db, + 0x2b5449e58eb3bbaa, + 0x789814d26adc9cfe, + 0x3c1bab3f8b48dd0b, + 0xda0fe1fff979c60a, + 0x4468de2d7c2dd693, + // 2^128 * 2 * B + 0x51bb355e9419469e, + 0x33e6dc4c23ddc754, + 0x93a5b6d6447f9962, + 0x6cce7c6ffb44bd63, + 0x4b9ad8c6f86307ce, + 0x21113531435d0c28, + 0xd4a866c5657a772c, + 0x5da6427e63247352, + 0x1a94c688deac22ca, + 0xb9066ef7bbae1ff8, + 0x88ad8c388d59580f, + 0x58f29abfe79f2ca8, + // 2^128 * 3 * B + 0xe90ecfab8de73e68, + 0x54036f9f377e76a5, + 0xf0495b0bbe015982, + 0x577629c4a7f41e36, + 0x4b5a64bf710ecdf6, + 0xb14ce538462c293c, + 0x3643d056d50b3ab9, + 0x6af93724185b4870, + 0x3220024509c6a888, + 0xd2e036134b558973, + 0x83e236233c33289f, + 0x701f25bb0caec18f, + // 2^128 * 4 * B + 0xc3a8b0f8e4616ced, + 0xf700660e9e25a87d, + 0x61e3061ff4bca59c, + 0x2e0c92bfbdc40be9, + 0x9d18f6d97cbec113, + 0x844a06e674bfdbe4, + 0x20f5b522ac4e60d6, + 0x720a5bc050955e51, + 0x0c3f09439b805a35, + 0xe84e8b376242abfc, + 0x691417f35c229346, + 0x0e9b9cbb144ef0ec, + // 2^128 * 5 * B + 0xfbbad48ffb5720ad, + 0xee81916bdbf90d0e, + 0xd4813152635543bf, + 0x221104eb3f337bd8, + 0x8dee9bd55db1beee, + 0xc9c3ab370a723fb9, + 0x44a8f1bf1c68d791, + 0x366d44191cfd3cde, + 0x9e3c1743f2bc8c14, + 0x2eda26fcb5856c3b, + 0xccb82f0e68a7fb97, + 0x4167a4e6bc593244, + // 2^128 * 6 * B + 0x643b9d2876f62700, + 0x5d1d9d400e7668eb, + 0x1b4b430321fc0684, + 0x7938bb7e2255246a, + 0xc2be2665f8ce8fee, + 0xe967ff14e880d62c, + 0xf12e6e7e2f364eee, + 0x34b33370cb7ed2f6, + 0xcdc591ee8681d6cc, + 0xce02109ced85a753, + 0xed7485c158808883, + 0x1176fc6e2dfe65e4, + // 2^128 * 7 * B + 0xb4af6cd05b9c619b, + 0x2ddfc9f4b2a58480, + 0x3d4fa502ebe94dc4, + 0x08fc3a4c677d5f34, + 0xdb90e28949770eb8, + 0x98fbcc2aacf440a3, + 0x21354ffeded7879b, + 0x1f6a3e54f26906b6, + 0x60a4c199d30734ea, + 0x40c085b631165cd6, + 0xe2333e23f7598295, + 0x4f2fad0116b900d1, + // 2^128 * 8 * B + 0x44beb24194ae4e54, + 0x5f541c511857ef6c, + 0xa61e6b2d368d0498, + 0x445484a4972ef7ab, + 0x962cd91db73bb638, + 0xe60577aafc129c08, + 0x6f619b39f3b61689, + 0x3451995f2944ee81, + 0x9152fcd09fea7d7c, + 0x4a816c94b0935cf6, + 0x258e9aaa47285c40, + 0x10b89ca6042893b7, + // 2^132 * 1 * B + 0x9b2a426e3b646025, + 0x32127190385ce4cf, + 0xa25cffc2dd6dea45, + 0x06409010bea8de75, + 0xd67cded679d34aa0, + 0xcc0b9ec0cc4db39f, + 0xa535a456e35d190f, + 0x2e05d9eaf61f6fef, + 0xc447901ad61beb59, + 0x661f19bce5dc880a, + 0x24685482b7ca6827, + 0x293c778cefe07f26, + // 2^132 * 2 * B + 0x86809e7007069096, + 0xaad75b15e4e50189, + 0x07f35715a21a0147, + 0x0487f3f112815d5e, + 0x16c795d6a11ff200, + 0xcb70d0e2b15815c9, + 0x89f293209b5395b5, + 0x50b8c2d031e47b4f, + 0x48350c08068a4962, + 0x6ffdd05351092c9a, + 0x17af4f4aaf6fc8dd, + 0x4b0553b53cdba58b, + // 2^132 * 3 * B + 0x9c65fcbe1b32ff79, + 0xeb75ea9f03b50f9b, + 0xfced2a6c6c07e606, + 0x35106cd551717908, + 0xbf05211b27c152d4, + 0x5ec26849bd1af639, + 0x5e0b2caa8e6fab98, + 0x054c8bdd50bd0840, + 0x38a0b12f1dcf073d, + 0x4b60a8a3b7f6a276, + 0xfed5ac25d3404f9a, + 0x72e82d5e5505c229, + // 2^132 * 4 * B + 0x6b0b697ff0d844c8, + 0xbb12f85cd979cb49, + 0xd2a541c6c1da0f1f, + 0x7b7c242958ce7211, + 0x00d9cdfd69771d02, + 0x410276cd6cfbf17e, + 0x4c45306c1cb12ec7, + 0x2857bf1627500861, + 0x9f21903f0101689e, + 0xd779dfd3bf861005, + 0xa122ee5f3deb0f1b, + 0x510df84b485a00d4, + // 2^132 * 5 * B + 0xa54133bb9277a1fa, + 0x74ec3b6263991237, + 0x1a3c54dc35d2f15a, + 0x2d347144e482ba3a, + 0x24b3c887c70ac15e, + 0xb0f3a557fb81b732, + 0x9b2cde2fe578cc1b, + 0x4cf7ed0703b54f8e, + 0x6bd47c6598fbee0f, + 0x9e4733e2ab55be2d, + 0x1093f624127610c5, + 0x4e05e26ad0a1eaa4, + // 2^132 * 6 * B + 0xda9b6b624b531f20, + 0x429a760e77509abb, + 0xdbe9f522e823cb80, + 0x618f1856880c8f82, + 0x1833c773e18fe6c0, + 0xe3c4711ad3c87265, + 0x3bfd3c4f0116b283, + 0x1955875eb4cd4db8, + 0x6da6de8f0e399799, + 0x7ad61aa440fda178, + 0xb32cd8105e3563dd, + 0x15f6beae2ae340ae, + // 2^132 * 7 * B + 0x862bcb0c31ec3a62, + 0x810e2b451138f3c2, + 0x788ec4b839dac2a4, + 0x28f76867ae2a9281, + 0xba9a0f7b9245e215, + 0xf368612dd98c0dbb, + 0x2e84e4cbf220b020, + 0x6ba92fe962d90eda, + 0x3e4df9655884e2aa, + 0xbd62fbdbdbd465a5, + 0xd7596caa0de9e524, + 0x6e8042ccb2b1b3d7, + // 2^132 * 8 * B + 0xf10d3c29ce28ca6e, + 0xbad34540fcb6093d, + 0xe7426ed7a2ea2d3f, + 0x08af9d4e4ff298b9, + 0x1530653616521f7e, + 0x660d06b896203dba, + 0x2d3989bc545f0879, + 0x4b5303af78ebd7b0, + 0x72f8a6c3bebcbde8, + 0x4f0fca4adc3a8e89, + 0x6fa9d4e8c7bfdf7a, + 0x0dcf2d679b624eb7, + // 2^136 * 1 * B + 0x3d5947499718289c, + 0x12ebf8c524533f26, + 0x0262bfcb14c3ef15, + 0x20b878d577b7518e, + 0x753941be5a45f06e, + 0xd07caeed6d9c5f65, + 0x11776b9c72ff51b6, + 0x17d2d1d9ef0d4da9, + 0x27f2af18073f3e6a, + 0xfd3fe519d7521069, + 0x22e3b72c3ca60022, + 0x72214f63cc65c6a7, + // 2^136 * 2 * B + 0xb4e37f405307a693, + 0xaba714d72f336795, + 0xd6fbd0a773761099, + 0x5fdf48c58171cbc9, + 0x1d9db7b9f43b29c9, + 0xd605824a4f518f75, + 0xf2c072bd312f9dc4, + 0x1f24ac855a1545b0, + 0x24d608328e9505aa, + 0x4748c1d10c1420ee, + 0xc7ffe45c06fb25a2, + 0x00ba739e2ae395e6, + // 2^136 * 3 * B + 0x592e98de5c8790d6, + 0xe5bfb7d345c2a2df, + 0x115a3b60f9b49922, + 0x03283a3e67ad78f3, + 0xae4426f5ea88bb26, + 0x360679d984973bfb, + 0x5c9f030c26694e50, + 0x72297de7d518d226, + 0x48241dc7be0cb939, + 0x32f19b4d8b633080, + 0xd3dfc90d02289308, + 0x05e1296846271945, + // 2^136 * 4 * B + 0xba82eeb32d9c495a, + 0xceefc8fcf12bb97c, + 0xb02dabae93b5d1e0, + 0x39c00c9c13698d9b, + 0xadbfbbc8242c4550, + 0xbcc80cecd03081d9, + 0x843566a6f5c8df92, + 0x78cf25d38258ce4c, + 0x15ae6b8e31489d68, + 0xaa851cab9c2bf087, + 0xc9a75a97f04efa05, + 0x006b52076b3ff832, + // 2^136 * 5 * B + 0x29e0cfe19d95781c, + 0xb681df18966310e2, + 0x57df39d370516b39, + 0x4d57e3443bc76122, + 0xf5cb7e16b9ce082d, + 0x3407f14c417abc29, + 0xd4b36bce2bf4a7ab, + 0x7de2e9561a9f75ce, + 0xde70d4f4b6a55ecb, + 0x4801527f5d85db99, + 0xdbc9c440d3ee9a81, + 0x6b2a90af1a6029ed, + // 2^136 * 6 * B + 0x6923f4fc9ae61e97, + 0x5735281de03f5fd1, + 0xa764ae43e6edd12d, + 0x5fd8f4e9d12d3e4a, + 0x77ebf3245bb2d80a, + 0xd8301b472fb9079b, + 0xc647e6f24cee7333, + 0x465812c8276c2109, + 0x4d43beb22a1062d9, + 0x7065fb753831dc16, + 0x180d4a7bde2968d7, + 0x05b32c2b1cb16790, + // 2^136 * 7 * B + 0xc8c05eccd24da8fd, + 0xa1cf1aac05dfef83, + 0xdbbeeff27df9cd61, + 0x3b5556a37b471e99, + 0xf7fca42c7ad58195, + 0x3214286e4333f3cc, + 0xb6c29d0d340b979d, + 0x31771a48567307e1, + 0x32b0c524e14dd482, + 0xedb351541a2ba4b6, + 0xa3d16048282b5af3, + 0x4fc079d27a7336eb, + // 2^136 * 8 * B + 0x51c938b089bf2f7f, + 0x2497bd6502dfe9a7, + 0xffffc09c7880e453, + 0x124567cecaf98e92, + 0xdc348b440c86c50d, + 0x1337cbc9cc94e651, + 0x6422f74d643e3cb9, + 0x241170c2bae3cd08, + 0x3ff9ab860ac473b4, + 0xf0911dee0113e435, + 0x4ae75060ebc6c4af, + 0x3f8612966c87000d, + // 2^140 * 1 * B + 0x0c9c5303f7957be4, + 0xa3c31a20e085c145, + 0xb0721d71d0850050, + 0x0aba390eab0bf2da, + 0x529fdffe638c7bf3, + 0xdf2b9e60388b4995, + 0xe027b34f1bad0249, + 0x7bc92fc9b9fa74ed, + 0x9f97ef2e801ad9f9, + 0x83697d5479afda3a, + 0xe906b3ffbd596b50, + 0x02672b37dd3fb8e0, + // 2^140 * 2 * B + 0x48b2ca8b260885e4, + 0xa4286bec82b34c1c, + 0x937e1a2617f58f74, + 0x741d1fcbab2ca2a5, + 0xee9ba729398ca7f5, + 0xeb9ca6257a4849db, + 0x29eb29ce7ec544e1, + 0x232ca21ef736e2c8, + 0xbf61423d253fcb17, + 0x08803ceafa39eb14, + 0xf18602df9851c7af, + 0x0400f3a049e3414b, + // 2^140 * 3 * B + 0xabce0476ba61c55b, + 0x36a3d6d7c4d39716, + 0x6eb259d5e8d82d09, + 0x0c9176e984d756fb, + 0x2efba412a06e7b06, + 0x146785452c8d2560, + 0xdf9713ebd67a91c7, + 0x32830ac7157eadf3, + 0x0e782a7ab73769e8, + 0x04a05d7875b18e2c, + 0x29525226ebcceae1, + 0x0d794f8383eba820, + // 2^140 * 4 * B + 0xff35f5cb9e1516f4, + 0xee805bcf648aae45, + 0xf0d73c2bb93a9ef3, + 0x097b0bf22092a6c2, + 0x7be44ce7a7a2e1ac, + 0x411fd93efad1b8b7, + 0x1734a1d70d5f7c9b, + 0x0d6592233127db16, + 0xc48bab1521a9d733, + 0xa6c2eaead61abb25, + 0x625c6c1cc6cb4305, + 0x7fc90fea93eb3a67, + // 2^140 * 5 * B + 0x0408f1fe1f5c5926, + 0x1a8f2f5e3b258bf4, + 0x40a951a2fdc71669, + 0x6598ee93c98b577e, + 0xc527deb59c7cb23d, + 0x955391695328404e, + 0xd64392817ccf2c7a, + 0x6ce97dabf7d8fa11, + 0x25b5a8e50ef7c48f, + 0xeb6034116f2ce532, + 0xc5e75173e53de537, + 0x73119fa08c12bb03, + // 2^140 * 6 * B + 0xed30129453f1a4cb, + 0xbce621c9c8f53787, + 0xfacb2b1338bee7b9, + 0x3025798a9ea8428c, + 0x7845b94d21f4774d, + 0xbf62f16c7897b727, + 0x671857c03c56522b, + 0x3cd6a85295621212, + 0x3fecde923aeca999, + 0xbdaa5b0062e8c12f, + 0x67b99dfc96988ade, + 0x3f52c02852661036, + // 2^140 * 7 * B + 0xffeaa48e2a1351c6, + 0x28624754fa7f53d7, + 0x0b5ba9e57582ddf1, + 0x60c0104ba696ac59, + 0x9258bf99eec416c6, + 0xac8a5017a9d2f671, + 0x629549ab16dea4ab, + 0x05d0e85c99091569, + 0x051de020de9cbe97, + 0xfa07fc56b50bcf74, + 0x378cec9f0f11df65, + 0x36853c69ab96de4d, + // 2^140 * 8 * B + 0x36d9b8de78f39b2d, + 0x7f42ed71a847b9ec, + 0x241cd1d679bd3fde, + 0x6a704fec92fbce6b, + 0x4433c0b0fac5e7be, + 0x724bae854c08dcbe, + 0xf1f24cc446978f9b, + 0x4a0aff6d62825fc8, + 0xe917fb9e61095301, + 0xc102df9402a092f8, + 0xbf09e2f5fa66190b, + 0x681109bee0dcfe37, + // 2^144 * 1 * B + 0x559a0cc9782a0dde, + 0x551dcdb2ea718385, + 0x7f62865b31ef238c, + 0x504aa7767973613d, + 0x9c18fcfa36048d13, + 0x29159db373899ddd, + 0xdc9f350b9f92d0aa, + 0x26f57eee878a19d4, + 0x0cab2cd55687efb1, + 0x5180d162247af17b, + 0x85c15a344f5a2467, + 0x4041943d9dba3069, + // 2^144 * 2 * B + 0xc3c0eeba43ebcc96, + 0x8d749c9c26ea9caf, + 0xd9fa95ee1c77ccc6, + 0x1420a1d97684340f, + 0x4b217743a26caadd, + 0x47a6b424648ab7ce, + 0xcb1d4f7a03fbc9e3, + 0x12d931429800d019, + 0x00c67799d337594f, + 0x5e3c5140b23aa47b, + 0x44182854e35ff395, + 0x1b4f92314359a012, + // 2^144 * 3 * B + 0x3e5c109d89150951, + 0x39cefa912de9696a, + 0x20eae43f975f3020, + 0x239b572a7f132dae, + 0x33cf3030a49866b1, + 0x251f73d2215f4859, + 0xab82aa4051def4f6, + 0x5ff191d56f9a23f6, + 0x819ed433ac2d9068, + 0x2883ab795fc98523, + 0xef4572805593eb3d, + 0x020c526a758f36cb, + // 2^144 * 4 * B + 0x779834f89ed8dbbc, + 0xc8f2aaf9dc7ca46c, + 0xa9524cdca3e1b074, + 0x02aacc4615313877, + 0xe931ef59f042cc89, + 0x2c589c9d8e124bb6, + 0xadc8e18aaec75997, + 0x452cfe0a5602c50c, + 0x86a0f7a0647877df, + 0xbbc464270e607c9f, + 0xab17ea25f1fb11c9, + 0x4cfb7d7b304b877b, + // 2^144 * 5 * B + 0x72b43d6cb89b75fe, + 0x54c694d99c6adc80, + 0xb8c3aa373ee34c9f, + 0x14b4622b39075364, + 0xe28699c29789ef12, + 0x2b6ecd71df57190d, + 0xc343c857ecc970d0, + 0x5b1d4cbc434d3ac5, + 0xb6fb2615cc0a9f26, + 0x3a4f0e2bb88dcce5, + 0x1301498b3369a705, + 0x2f98f71258592dd1, + // 2^144 * 6 * B + 0x0c94a74cb50f9e56, + 0x5b1ff4a98e8e1320, + 0x9a2acc2182300f67, + 0x3a6ae249d806aaf9, + 0x2e12ae444f54a701, + 0xfcfe3ef0a9cbd7de, + 0xcebf890d75835de0, + 0x1d8062e9e7614554, + 0x657ada85a9907c5a, + 0x1a0ea8b591b90f62, + 0x8d0e1dfbdf34b4e9, + 0x298b8ce8aef25ff3, + // 2^144 * 7 * B + 0x2a927953eff70cb2, + 0x4b89c92a79157076, + 0x9418457a30a7cf6a, + 0x34b8a8404d5ce485, + 0x837a72ea0a2165de, + 0x3fab07b40bcf79f6, + 0x521636c77738ae70, + 0x6ba6271803a7d7dc, + 0xc26eecb583693335, + 0xd5a813df63b5fefd, + 0xa293aa9aa4b22573, + 0x71d62bdd465e1c6a, + // 2^144 * 8 * B + 0x6533cc28d378df80, + 0xf6db43790a0fa4b4, + 0xe3645ff9f701da5a, + 0x74d5f317f3172ba4, + 0xcd2db5dab1f75ef5, + 0xd77f95cf16b065f5, + 0x14571fea3f49f085, + 0x1c333621262b2b3d, + 0xa86fe55467d9ca81, + 0x398b7c752b298c37, + 0xda6d0892e3ac623b, + 0x4aebcc4547e9d98c, + // 2^148 * 1 * B + 0x53175a7205d21a77, + 0xb0c04422d3b934d4, + 0xadd9f24bdd5deadc, + 0x074f46e69f10ff8c, + 0x0de9b204a059a445, + 0xe15cb4aa4b17ad0f, + 0xe1bbec521f79c557, + 0x2633f1b9d071081b, + 0xc1fb4177018b9910, + 0xa6ea20dc6c0fe140, + 0xd661f3e74354c6ff, + 0x5ecb72e6f1a3407a, + // 2^148 * 2 * B + 0xa515a31b2259fb4e, + 0x0960f3972bcac52f, + 0xedb52fec8d3454cb, + 0x382e2720c476c019, + 0xfeeae106e8e86997, + 0x9863337f98d09383, + 0x9470480eaa06ebef, + 0x038b6898d4c5c2d0, + 0xf391c51d8ace50a6, + 0x3142d0b9ae2d2948, + 0xdb4d5a1a7f24ca80, + 0x21aeba8b59250ea8, + // 2^148 * 3 * B + 0x24f13b34cf405530, + 0x3c44ea4a43088af7, + 0x5dd5c5170006a482, + 0x118eb8f8890b086d, + 0x53853600f0087f23, + 0x4c461879da7d5784, + 0x6af303deb41f6860, + 0x0a3c16c5c27c18ed, + 0x17e49c17cc947f3d, + 0xccc6eda6aac1d27b, + 0xdf6092ceb0f08e56, + 0x4909b3e22c67c36b, + // 2^148 * 4 * B + 0x9c9c85ea63fe2e89, + 0xbe1baf910e9412ec, + 0x8f7baa8a86fbfe7b, + 0x0fb17f9fef968b6c, + 0x59a16676706ff64e, + 0x10b953dd0d86a53d, + 0x5848e1e6ce5c0b96, + 0x2d8b78e712780c68, + 0x79d5c62eafc3902b, + 0x773a215289e80728, + 0xc38ae640e10120b9, + 0x09ae23717b2b1a6d, + // 2^148 * 5 * B + 0xbb6a192a4e4d083c, + 0x34ace0630029e192, + 0x98245a59aafabaeb, + 0x6d9c8a9ada97faac, + 0x10ab8fa1ad32b1d0, + 0xe9aced1be2778b24, + 0xa8856bc0373de90f, + 0x66f35ddddda53996, + 0xd27d9afb24997323, + 0x1bb7e07ef6f01d2e, + 0x2ba7472df52ecc7f, + 0x03019b4f646f9dc8, + // 2^148 * 6 * B + 0x04a186b5565345cd, + 0xeee76610bcc4116a, + 0x689c73b478fb2a45, + 0x387dcbff65697512, + 0xaf09b214e6b3dc6b, + 0x3f7573b5ad7d2f65, + 0xd019d988100a23b0, + 0x392b63a58b5c35f7, + 0x4093addc9c07c205, + 0xc565be15f532c37e, + 0x63dbecfd1583402a, + 0x61722b4aef2e032e, + // 2^148 * 7 * B + 0x0012aafeecbd47af, + 0x55a266fb1cd46309, + 0xf203eb680967c72c, + 0x39633944ca3c1429, + 0xd6b07a5581cb0e3c, + 0x290ff006d9444969, + 0x08680b6a16dcda1f, + 0x5568d2b75a06de59, + 0x8d0cb88c1b37cfe1, + 0x05b6a5a3053818f3, + 0xf2e9bc04b787d959, + 0x6beba1249add7f64, + // 2^148 * 8 * B + 0x1d06005ca5b1b143, + 0x6d4c6bb87fd1cda2, + 0x6ef5967653fcffe7, + 0x097c29e8c1ce1ea5, + 0x5c3cecb943f5a53b, + 0x9cc9a61d06c08df2, + 0xcfba639a85895447, + 0x5a845ae80df09fd5, + 0x4ce97dbe5deb94ca, + 0x38d0a4388c709c48, + 0xc43eced4a169d097, + 0x0a1249fff7e587c3, + // 2^152 * 1 * B + 0x12f0071b276d01c9, + 0xe7b8bac586c48c70, + 0x5308129b71d6fba9, + 0x5d88fbf95a3db792, + 0x0b408d9e7354b610, + 0x806b32535ba85b6e, + 0xdbe63a034a58a207, + 0x173bd9ddc9a1df2c, + 0x2b500f1efe5872df, + 0x58d6582ed43918c1, + 0xe6ed278ec9673ae0, + 0x06e1cd13b19ea319, + // 2^152 * 2 * B + 0x40d0ad516f166f23, + 0x118e32931fab6abe, + 0x3fe35e14a04d088e, + 0x3080603526e16266, + 0x472baf629e5b0353, + 0x3baa0b90278d0447, + 0x0c785f469643bf27, + 0x7f3a6a1a8d837b13, + 0xf7e644395d3d800b, + 0x95a8d555c901edf6, + 0x68cd7830592c6339, + 0x30d0fded2e51307e, + // 2^152 * 3 * B + 0xe0594d1af21233b3, + 0x1bdbe78ef0cc4d9c, + 0x6965187f8f499a77, + 0x0a9214202c099868, + 0x9cb4971e68b84750, + 0xa09572296664bbcf, + 0x5c8de72672fa412b, + 0x4615084351c589d9, + 0xbc9019c0aeb9a02e, + 0x55c7110d16034cae, + 0x0e6df501659932ec, + 0x3bca0d2895ca5dfe, + // 2^152 * 4 * B + 0x40f031bc3c5d62a4, + 0x19fc8b3ecff07a60, + 0x98183da2130fb545, + 0x5631deddae8f13cd, + 0x9c688eb69ecc01bf, + 0xf0bc83ada644896f, + 0xca2d955f5f7a9fe2, + 0x4ea8b4038df28241, + 0x2aed460af1cad202, + 0x46305305a48cee83, + 0x9121774549f11a5f, + 0x24ce0930542ca463, + // 2^152 * 5 * B + 0x1fe890f5fd06c106, + 0xb5c468355d8810f2, + 0x827808fe6e8caf3e, + 0x41d4e3c28a06d74b, + 0x3fcfa155fdf30b85, + 0xd2f7168e36372ea4, + 0xb2e064de6492f844, + 0x549928a7324f4280, + 0xf26e32a763ee1a2e, + 0xae91e4b7d25ffdea, + 0xbc3bd33bd17f4d69, + 0x491b66dec0dcff6a, + // 2^152 * 6 * B + 0x98f5b13dc7ea32a7, + 0xe3d5f8cc7e16db98, + 0xac0abf52cbf8d947, + 0x08f338d0c85ee4ac, + 0x75f04a8ed0da64a1, + 0xed222caf67e2284b, + 0x8234a3791f7b7ba4, + 0x4cf6b8b0b7018b67, + 0xc383a821991a73bd, + 0xab27bc01df320c7a, + 0xc13d331b84777063, + 0x530d4a82eb078a99, + // 2^152 * 7 * B + 0x004c3630e1f94825, + 0x7e2d78268cab535a, + 0xc7482323cc84ff8b, + 0x65ea753f101770b9, + 0x6d6973456c9abf9e, + 0x257fb2fc4900a880, + 0x2bacf412c8cfb850, + 0x0db3e7e00cbfbd5b, + 0x3d66fc3ee2096363, + 0x81d62c7f61b5cb6b, + 0x0fbe044213443b1a, + 0x02a4ec1921e1a1db, + // 2^152 * 8 * B + 0x5ce6259a3b24b8a2, + 0xb8577acc45afa0b8, + 0xcccbe6e88ba07037, + 0x3d143c51127809bf, + 0xf5c86162f1cf795f, + 0x118c861926ee57f2, + 0x172124851c063578, + 0x36d12b5dec067fcf, + 0x126d279179154557, + 0xd5e48f5cfc783a0a, + 0x36bdb6e8df179bac, + 0x2ef517885ba82859, + // 2^156 * 1 * B + 0x88bd438cd11e0d4a, + 0x30cb610d43ccf308, + 0xe09a0e3791937bcc, + 0x4559135b25b1720c, + 0x1ea436837c6da1e9, + 0xf9c189af1fb9bdbe, + 0x303001fcce5dd155, + 0x28a7c99ebc57be52, + 0xb8fd9399e8d19e9d, + 0x908191cb962423ff, + 0xb2b948d747c742a3, + 0x37f33226d7fb44c4, + // 2^156 * 2 * B + 0x0dae8767b55f6e08, + 0x4a43b3b35b203a02, + 0xe3725a6e80af8c79, + 0x0f7a7fd1705fa7a3, + 0x33912553c821b11d, + 0x66ed42c241e301df, + 0x066fcc11104222fd, + 0x307a3b41c192168f, + 0x8eeb5d076eb55ce0, + 0x2fc536bfaa0d925a, + 0xbe81830fdcb6c6e8, + 0x556c7045827baf52, + // 2^156 * 3 * B + 0x8e2b517302e9d8b7, + 0xe3e52269248714e8, + 0xbd4fbd774ca960b5, + 0x6f4b4199c5ecada9, + 0xb94b90022bf44406, + 0xabd4237eff90b534, + 0x7600a960faf86d3a, + 0x2f45abdac2322ee3, + 0x61af4912c8ef8a6a, + 0xe58fa4fe43fb6e5e, + 0xb5afcc5d6fd427cf, + 0x6a5393281e1e11eb, + // 2^156 * 4 * B + 0xf3da5139a5d1ee89, + 0x8145457cff936988, + 0x3f622fed00e188c4, + 0x0f513815db8b5a3d, + 0x0fff04fe149443cf, + 0x53cac6d9865cddd7, + 0x31385b03531ed1b7, + 0x5846a27cacd1039d, + 0x4ff5cdac1eb08717, + 0x67e8b29590f2e9bc, + 0x44093b5e237afa99, + 0x0d414bed8708b8b2, + // 2^156 * 5 * B + 0xcfb68265fd0e75f6, + 0xe45b3e28bb90e707, + 0x7242a8de9ff92c7a, + 0x685b3201933202dd, + 0x81886a92294ac9e8, + 0x23162b45d55547be, + 0x94cfbc4403715983, + 0x50eb8fdb134bc401, + 0xc0b73ec6d6b330cd, + 0x84e44807132faff1, + 0x732b7352c4a5dee1, + 0x5d7c7cf1aa7cd2d2, + // 2^156 * 6 * B + 0xaf3b46bf7a4aafa2, + 0xb78705ec4d40d411, + 0x114f0c6aca7c15e3, + 0x3f364faaa9489d4d, + 0x33d1013e9b73a562, + 0x925cef5748ec26e1, + 0xa7fce614dd468058, + 0x78b0fad41e9aa438, + 0xbf56a431ed05b488, + 0xa533e66c9c495c7e, + 0xe8652baf87f3651a, + 0x0241800059d66c33, + // 2^156 * 7 * B + 0xceb077fea37a5be4, + 0xdb642f02e5a5eeb7, + 0xc2e6d0c5471270b8, + 0x4771b65538e4529c, + 0x28350c7dcf38ea01, + 0x7c6cdbc0b2917ab6, + 0xace7cfbe857082f7, + 0x4d2845aba2d9a1e0, + 0xbb537fe0447070de, + 0xcba744436dd557df, + 0xd3b5a3473600dbcb, + 0x4aeabbe6f9ffd7f8, + // 2^156 * 8 * B + 0x4630119e40d8f78c, + 0xa01a9bc53c710e11, + 0x486d2b258910dd79, + 0x1e6c47b3db0324e5, + 0x6a2134bcc4a9c8f2, + 0xfbf8fd1c8ace2e37, + 0x000ae3049911a0ba, + 0x046e3a616bc89b9e, + 0x14e65442f03906be, + 0x4a019d54e362be2a, + 0x68ccdfec8dc230c7, + 0x7cfb7e3faf6b861c, + // 2^160 * 1 * B + 0x4637974e8c58aedc, + 0xb9ef22fbabf041a4, + 0xe185d956e980718a, + 0x2f1b78fab143a8a6, + 0x96eebffb305b2f51, + 0xd3f938ad889596b8, + 0xf0f52dc746d5dd25, + 0x57968290bb3a0095, + 0xf71ab8430a20e101, + 0xf393658d24f0ec47, + 0xcf7509a86ee2eed1, + 0x7dc43e35dc2aa3e1, + // 2^160 * 2 * B + 0x85966665887dd9c3, + 0xc90f9b314bb05355, + 0xc6e08df8ef2079b1, + 0x7ef72016758cc12f, + 0x5a782a5c273e9718, + 0x3576c6995e4efd94, + 0x0f2ed8051f237d3e, + 0x044fb81d82d50a99, + 0xc1df18c5a907e3d9, + 0x57b3371dce4c6359, + 0xca704534b201bb49, + 0x7f79823f9c30dd2e, + // 2^160 * 3 * B + 0x8334d239a3b513e8, + 0xc13670d4b91fa8d8, + 0x12b54136f590bd33, + 0x0a4e0373d784d9b4, + 0x6a9c1ff068f587ba, + 0x0827894e0050c8de, + 0x3cbf99557ded5be7, + 0x64a9b0431c06d6f0, + 0x2eb3d6a15b7d2919, + 0xb0b4f6a0d53a8235, + 0x7156ce4389a45d47, + 0x071a7d0ace18346c, + // 2^160 * 4 * B + 0xd3072daac887ba0b, + 0x01262905bfa562ee, + 0xcf543002c0ef768b, + 0x2c3bcc7146ea7e9c, + 0xcc0c355220e14431, + 0x0d65950709b15141, + 0x9af5621b209d5f36, + 0x7c69bcf7617755d3, + 0x07f0d7eb04e8295f, + 0x10db18252f50f37d, + 0xe951a9a3171798d7, + 0x6f5a9a7322aca51d, + // 2^160 * 5 * B + 0x8ba1000c2f41c6c5, + 0xc49f79c10cfefb9b, + 0x4efa47703cc51c9f, + 0x494e21a2e147afca, + 0xe729d4eba3d944be, + 0x8d9e09408078af9e, + 0x4525567a47869c03, + 0x02ab9680ee8d3b24, + 0xefa48a85dde50d9a, + 0x219a224e0fb9a249, + 0xfa091f1dd91ef6d9, + 0x6b5d76cbea46bb34, + // 2^160 * 6 * B + 0x8857556cec0cd994, + 0x6472dc6f5cd01dba, + 0xaf0169148f42b477, + 0x0ae333f685277354, + 0xe0f941171e782522, + 0xf1e6ae74036936d3, + 0x408b3ea2d0fcc746, + 0x16fb869c03dd313e, + 0x288e199733b60962, + 0x24fc72b4d8abe133, + 0x4811f7ed0991d03e, + 0x3f81e38b8f70d075, + // 2^160 * 7 * B + 0x7f910fcc7ed9affe, + 0x545cb8a12465874b, + 0xa8397ed24b0c4704, + 0x50510fc104f50993, + 0x0adb7f355f17c824, + 0x74b923c3d74299a4, + 0xd57c3e8bcbf8eaf7, + 0x0ad3e2d34cdedc3d, + 0x6f0c0fc5336e249d, + 0x745ede19c331cfd9, + 0xf2d6fd0009eefe1c, + 0x127c158bf0fa1ebe, + // 2^160 * 8 * B + 0xf6197c422e9879a2, + 0xa44addd452ca3647, + 0x9b413fc14b4eaccb, + 0x354ef87d07ef4f68, + 0xdea28fc4ae51b974, + 0x1d9973d3744dfe96, + 0x6240680b873848a8, + 0x4ed82479d167df95, + 0xfee3b52260c5d975, + 0x50352efceb41b0b8, + 0x8808ac30a9f6653c, + 0x302d92d20539236d, + // 2^164 * 1 * B + 0x4c59023fcb3efb7c, + 0x6c2fcb99c63c2a94, + 0xba4190e2c3c7e084, + 0x0e545daea51874d9, + 0x957b8b8b0df53c30, + 0x2a1c770a8e60f098, + 0xbbc7a670345796de, + 0x22a48f9a90c99bc9, + 0x6b7dc0dc8d3fac58, + 0x5497cd6ce6e42bfd, + 0x542f7d1bf400d305, + 0x4159f47f048d9136, + // 2^164 * 2 * B + 0x20ad660839e31e32, + 0xf81e1bd58405be50, + 0xf8064056f4dabc69, + 0x14d23dd4ce71b975, + 0x748515a8bbd24839, + 0x77128347afb02b55, + 0x50ba2ac649a2a17f, + 0x060525513ad730f1, + 0xf2398e098aa27f82, + 0x6d7982bb89a1b024, + 0xfa694084214dd24c, + 0x71ab966fa32301c3, + // 2^164 * 3 * B + 0x2dcbd8e34ded02fc, + 0x1151f3ec596f22aa, + 0xbca255434e0328da, + 0x35768fbe92411b22, + 0xb1088a0702809955, + 0x43b273ea0b43c391, + 0xca9b67aefe0686ed, + 0x605eecbf8335f4ed, + 0x83200a656c340431, + 0x9fcd71678ee59c2f, + 0x75d4613f71300f8a, + 0x7a912faf60f542f9, + // 2^164 * 4 * B + 0xb204585e5edc1a43, + 0x9f0e16ee5897c73c, + 0x5b82c0ae4e70483c, + 0x624a170e2bddf9be, + 0x253f4f8dfa2d5597, + 0x25e49c405477130c, + 0x00c052e5996b1102, + 0x33cb966e33bb6c4a, + 0x597028047f116909, + 0x828ac41c1e564467, + 0x70417dbde6217387, + 0x721627aefbac4384, + // 2^164 * 5 * B + 0x97d03bc38736add5, + 0x2f1422afc532b130, + 0x3aa68a057101bbc4, + 0x4c946cf7e74f9fa7, + 0xfd3097bc410b2f22, + 0xf1a05da7b5cfa844, + 0x61289a1def57ca74, + 0x245ea199bb821902, + 0xaedca66978d477f8, + 0x1898ba3c29117fe1, + 0xcf73f983720cbd58, + 0x67da12e6b8b56351, + // 2^164 * 6 * B + 0x7067e187b4bd6e07, + 0x6e8f0203c7d1fe74, + 0x93c6aa2f38c85a30, + 0x76297d1f3d75a78a, + 0x2b7ef3d38ec8308c, + 0x828fd7ec71eb94ab, + 0x807c3b36c5062abd, + 0x0cb64cb831a94141, + 0x3030fc33534c6378, + 0xb9635c5ce541e861, + 0x15d9a9bed9b2c728, + 0x49233ea3f3775dcb, + // 2^164 * 7 * B + 0x629398fa8dbffc3a, + 0xe12fe52dd54db455, + 0xf3be11dfdaf25295, + 0x628b140dce5e7b51, + 0x7b3985fe1c9f249b, + 0x4fd6b2d5a1233293, + 0xceb345941adf4d62, + 0x6987ff6f542de50c, + 0x47e241428f83753c, + 0x6317bebc866af997, + 0xdabb5b433d1a9829, + 0x074d8d245287fb2d, + // 2^164 * 8 * B + 0x8337d9cd440bfc31, + 0x729d2ca1af318fd7, + 0xa040a4a4772c2070, + 0x46002ef03a7349be, + 0x481875c6c0e31488, + 0x219429b2e22034b4, + 0x7223c98a31283b65, + 0x3420d60b342277f9, + 0xfaa23adeaffe65f7, + 0x78261ed45be0764c, + 0x441c0a1e2f164403, + 0x5aea8e567a87d395, + // 2^168 * 1 * B + 0x7813c1a2bca4283d, + 0xed62f091a1863dd9, + 0xaec7bcb8c268fa86, + 0x10e5d3b76f1cae4c, + 0x2dbc6fb6e4e0f177, + 0x04e1bf29a4bd6a93, + 0x5e1966d4787af6e8, + 0x0edc5f5eb426d060, + 0x5453bfd653da8e67, + 0xe9dc1eec24a9f641, + 0xbf87263b03578a23, + 0x45b46c51361cba72, + // 2^168 * 2 * B + 0xa9402abf314f7fa1, + 0xe257f1dc8e8cf450, + 0x1dbbd54b23a8be84, + 0x2177bfa36dcb713b, + 0xce9d4ddd8a7fe3e4, + 0xab13645676620e30, + 0x4b594f7bb30e9958, + 0x5c1c0aef321229df, + 0x37081bbcfa79db8f, + 0x6048811ec25f59b3, + 0x087a76659c832487, + 0x4ae619387d8ab5bb, + // 2^168 * 3 * B + 0x8ddbf6aa5344a32e, + 0x7d88eab4b41b4078, + 0x5eb0eb974a130d60, + 0x1a00d91b17bf3e03, + 0x61117e44985bfb83, + 0xfce0462a71963136, + 0x83ac3448d425904b, + 0x75685abe5ba43d64, + 0x6e960933eb61f2b2, + 0x543d0fa8c9ff4952, + 0xdf7275107af66569, + 0x135529b623b0e6aa, + // 2^168 * 4 * B + 0x18f0dbd7add1d518, + 0x979f7888cfc11f11, + 0x8732e1f07114759b, + 0x79b5b81a65ca3a01, + 0xf5c716bce22e83fe, + 0xb42beb19e80985c1, + 0xec9da63714254aae, + 0x5972ea051590a613, + 0x0fd4ac20dc8f7811, + 0x9a9ad294ac4d4fa8, + 0xc01b2d64b3360434, + 0x4f7e9c95905f3bdb, + // 2^168 * 5 * B + 0x62674bbc5781302e, + 0xd8520f3989addc0f, + 0x8c2999ae53fbd9c6, + 0x31993ad92e638e4c, + 0x71c8443d355299fe, + 0x8bcd3b1cdbebead7, + 0x8092499ef1a49466, + 0x1942eec4a144adc8, + 0x7dac5319ae234992, + 0x2c1b3d910cea3e92, + 0x553ce494253c1122, + 0x2a0a65314ef9ca75, + // 2^168 * 6 * B + 0x2db7937ff7f927c2, + 0xdb741f0617d0a635, + 0x5982f3a21155af76, + 0x4cf6e218647c2ded, + 0xcf361acd3c1c793a, + 0x2f9ebcac5a35bc3b, + 0x60e860e9a8cda6ab, + 0x055dc39b6dea1a13, + 0xb119227cc28d5bb6, + 0x07e24ebc774dffab, + 0xa83c78cee4a32c89, + 0x121a307710aa24b6, + // 2^168 * 7 * B + 0xe4db5d5e9f034a97, + 0xe153fc093034bc2d, + 0x460546919551d3b1, + 0x333fc76c7a40e52d, + 0xd659713ec77483c9, + 0x88bfe077b82b96af, + 0x289e28231097bcd3, + 0x527bb94a6ced3a9b, + 0x563d992a995b482e, + 0x3405d07c6e383801, + 0x485035de2f64d8e5, + 0x6b89069b20a7a9f7, + // 2^168 * 8 * B + 0x812aa0416270220d, + 0x995a89faf9245b4e, + 0xffadc4ce5072ef05, + 0x23bc2103aa73eb73, + 0x4082fa8cb5c7db77, + 0x068686f8c734c155, + 0x29e6c8d9f6e7a57e, + 0x0473d308a7639bcf, + 0xcaee792603589e05, + 0x2b4b421246dcc492, + 0x02a1ef74e601a94f, + 0x102f73bfde04341a, + // 2^172 * 1 * B + 0xb5a2d50c7ec20d3e, + 0xc64bdd6ea0c97263, + 0x56e89052c1ff734d, + 0x4929c6f72b2ffaba, + 0x358ecba293a36247, + 0xaf8f9862b268fd65, + 0x412f7e9968a01c89, + 0x5786f312cd754524, + 0x337788ffca14032c, + 0xf3921028447f1ee3, + 0x8b14071f231bccad, + 0x4c817b4bf2344783, + // 2^172 * 2 * B + 0x0ff853852871b96e, + 0xe13e9fab60c3f1bb, + 0xeefd595325344402, + 0x0a37c37075b7744b, + 0x413ba057a40b4484, + 0xba4c2e1a4f5f6a43, + 0x614ba0a5aee1d61c, + 0x78a1531a8b05dc53, + 0x6cbdf1703ad0562b, + 0x8ecf4830c92521a3, + 0xdaebd303fd8424e7, + 0x72ad82a42e5ec56f, + // 2^172 * 3 * B + 0x3f9e8e35bafb65f6, + 0x39d69ec8f27293a1, + 0x6cb8cd958cf6a3d0, + 0x1734778173adae6d, + 0xc368939167024bc3, + 0x8e69d16d49502fda, + 0xfcf2ec3ce45f4b29, + 0x065f669ea3b4cbc4, + 0x8a00aec75532db4d, + 0xb869a4e443e31bb1, + 0x4a0f8552d3a7f515, + 0x19adeb7c303d7c08, + // 2^172 * 4 * B + 0xc720cb6153ead9a3, + 0x55b2c97f512b636e, + 0xb1e35b5fd40290b1, + 0x2fd9ccf13b530ee2, + 0x9d05ba7d43c31794, + 0x2470c8ff93322526, + 0x8323dec816197438, + 0x2852709881569b53, + 0x07bd475b47f796b8, + 0xd2c7b013542c8f54, + 0x2dbd23f43b24f87e, + 0x6551afd77b0901d6, + // 2^172 * 5 * B + 0x4546baaf54aac27f, + 0xf6f66fecb2a45a28, + 0x582d1b5b562bcfe8, + 0x44b123f3920f785f, + 0x68a24ce3a1d5c9ac, + 0xbb77a33d10ff6461, + 0x0f86ce4425d3166e, + 0x56507c0950b9623b, + 0x1206f0b7d1713e63, + 0x353fe3d915bafc74, + 0x194ceb970ad9d94d, + 0x62fadd7cf9d03ad3, + // 2^172 * 6 * B + 0xc6b5967b5598a074, + 0x5efe91ce8e493e25, + 0xd4b72c4549280888, + 0x20ef1149a26740c2, + 0x3cd7bc61e7ce4594, + 0xcd6b35a9b7dd267e, + 0xa080abc84366ef27, + 0x6ec7c46f59c79711, + 0x2f07ad636f09a8a2, + 0x8697e6ce24205e7d, + 0xc0aefc05ee35a139, + 0x15e80958b5f9d897, + // 2^172 * 7 * B + 0x25a5ef7d0c3e235b, + 0x6c39c17fbe134ee7, + 0xc774e1342dc5c327, + 0x021354b892021f39, + 0x4dd1ed355bb061c4, + 0x42dc0cef941c0700, + 0x61305dc1fd86340e, + 0x56b2cc930e55a443, + 0x1df79da6a6bfc5a2, + 0x02f3a2749fde4369, + 0xb323d9f2cda390a7, + 0x7be0847b8774d363, + // 2^172 * 8 * B + 0x8c99cc5a8b3f55c3, + 0x0611d7253fded2a0, + 0xed2995ff36b70a36, + 0x1f699a54d78a2619, + 0x1466f5af5307fa11, + 0x817fcc7ded6c0af2, + 0x0a6de44ec3a4a3fb, + 0x74071475bc927d0b, + 0xe77292f373e7ea8a, + 0x296537d2cb045a31, + 0x1bd0653ed3274fde, + 0x2f9a2c4476bd2966, + // 2^176 * 1 * B + 0xeb18b9ab7f5745c6, + 0x023a8aee5787c690, + 0xb72712da2df7afa9, + 0x36597d25ea5c013d, + 0xa2b4dae0b5511c9a, + 0x7ac860292bffff06, + 0x981f375df5504234, + 0x3f6bd725da4ea12d, + 0x734d8d7b106058ac, + 0xd940579e6fc6905f, + 0x6466f8f99202932d, + 0x7b7ecc19da60d6d0, + // 2^176 * 2 * B + 0x78c2373c695c690d, + 0xdd252e660642906e, + 0x951d44444ae12bd2, + 0x4235ad7601743956, + 0x6dae4a51a77cfa9b, + 0x82263654e7a38650, + 0x09bbffcd8f2d82db, + 0x03bedc661bf5caba, + 0x6258cb0d078975f5, + 0x492942549189f298, + 0xa0cab423e2e36ee4, + 0x0e7ce2b0cdf066a1, + // 2^176 * 3 * B + 0xc494643ac48c85a3, + 0xfd361df43c6139ad, + 0x09db17dd3ae94d48, + 0x666e0a5d8fb4674a, + 0xfea6fedfd94b70f9, + 0xf130c051c1fcba2d, + 0x4882d47e7f2fab89, + 0x615256138aeceeb5, + 0x2abbf64e4870cb0d, + 0xcd65bcf0aa458b6b, + 0x9abe4eba75e8985d, + 0x7f0bc810d514dee4, + // 2^176 * 4 * B + 0xb9006ba426f4136f, + 0x8d67369e57e03035, + 0xcbc8dfd94f463c28, + 0x0d1f8dbcf8eedbf5, + 0x83ac9dad737213a0, + 0x9ff6f8ba2ef72e98, + 0x311e2edd43ec6957, + 0x1d3a907ddec5ab75, + 0xba1693313ed081dc, + 0x29329fad851b3480, + 0x0128013c030321cb, + 0x00011b44a31bfde3, + // 2^176 * 5 * B + 0x3fdfa06c3fc66c0c, + 0x5d40e38e4dd60dd2, + 0x7ae38b38268e4d71, + 0x3ac48d916e8357e1, + 0x16561f696a0aa75c, + 0xc1bf725c5852bd6a, + 0x11a8dd7f9a7966ad, + 0x63d988a2d2851026, + 0x00120753afbd232e, + 0xe92bceb8fdd8f683, + 0xf81669b384e72b91, + 0x33fad52b2368a066, + // 2^176 * 6 * B + 0x540649c6c5e41e16, + 0x0af86430333f7735, + 0xb2acfcd2f305e746, + 0x16c0f429a256dca7, + 0x8d2cc8d0c422cfe8, + 0x072b4f7b05a13acb, + 0xa3feb6e6ecf6a56f, + 0x3cc355ccb90a71e2, + 0xe9b69443903e9131, + 0xb8a494cb7a5637ce, + 0xc87cd1a4baba9244, + 0x631eaf426bae7568, + // 2^176 * 7 * B + 0xb3e90410da66fe9f, + 0x85dd4b526c16e5a6, + 0xbc3d97611ef9bf83, + 0x5599648b1ea919b5, + 0x47d975b9a3700de8, + 0x7280c5fbe2f80552, + 0x53658f2732e45de1, + 0x431f2c7f665f80b5, + 0xd6026344858f7b19, + 0x14ab352fa1ea514a, + 0x8900441a2090a9d7, + 0x7b04715f91253b26, + // 2^176 * 8 * B + 0x83edbd28acf6ae43, + 0x86357c8b7d5c7ab4, + 0xc0404769b7eb2c44, + 0x59b37bf5c2f6583f, + 0xb376c280c4e6bac6, + 0x970ed3dd6d1d9b0b, + 0xb09a9558450bf944, + 0x48d0acfa57cde223, + 0xb60f26e47dabe671, + 0xf1d1a197622f3a37, + 0x4208ce7ee9960394, + 0x16234191336d3bdb, + // 2^180 * 1 * B + 0xf19aeac733a63aef, + 0x2c7fba5d4442454e, + 0x5da87aa04795e441, + 0x413051e1a4e0b0f5, + 0x852dd1fd3d578bbe, + 0x2b65ce72c3286108, + 0x658c07f4eace2273, + 0x0933f804ec38ab40, + 0xa7ab69798d496476, + 0x8121aadefcb5abc8, + 0xa5dc12ef7b539472, + 0x07fd47065e45351a, + // 2^180 * 2 * B + 0xc8583c3d258d2bcd, + 0x17029a4daf60b73f, + 0xfa0fc9d6416a3781, + 0x1c1e5fba38b3fb23, + 0x304211559ae8e7c3, + 0xf281b229944882a5, + 0x8a13ac2e378250e4, + 0x014afa0954ba48f4, + 0xcb3197001bb3666c, + 0x330060524bffecb9, + 0x293711991a88233c, + 0x291884363d4ed364, + // 2^180 * 3 * B + 0x033c6805dc4babfa, + 0x2c15bf5e5596ecc1, + 0x1bc70624b59b1d3b, + 0x3ede9850a19f0ec5, + 0xfb9d37c3bc1ab6eb, + 0x02be14534d57a240, + 0xf4d73415f8a5e1f6, + 0x5964f4300ccc8188, + 0xe44a23152d096800, + 0x5c08c55970866996, + 0xdf2db60a46affb6e, + 0x579155c1f856fd89, + // 2^180 * 4 * B + 0x96324edd12e0c9ef, + 0x468b878df2420297, + 0x199a3776a4f573be, + 0x1e7fbcf18e91e92a, + 0xb5f16b630817e7a6, + 0x808c69233c351026, + 0x324a983b54cef201, + 0x53c092084a485345, + 0xd2d41481f1cbafbf, + 0x231d2db6716174e5, + 0x0b7d7656e2a55c98, + 0x3e955cd82aa495f6, + // 2^180 * 5 * B + 0xe48f535e3ed15433, + 0xd075692a0d7270a3, + 0x40fbd21daade6387, + 0x14264887cf4495f5, + 0xab39f3ef61bb3a3f, + 0x8eb400652eb9193e, + 0xb5de6ecc38c11f74, + 0x654d7e9626f3c49f, + 0xe564cfdd5c7d2ceb, + 0x82eeafded737ccb9, + 0x6107db62d1f9b0ab, + 0x0b6baac3b4358dbb, + // 2^180 * 6 * B + 0x7ae62bcb8622fe98, + 0x47762256ceb891af, + 0x1a5a92bcf2e406b4, + 0x7d29401784e41501, + 0x204abad63700a93b, + 0xbe0023d3da779373, + 0xd85f0346633ab709, + 0x00496dc490820412, + 0x1c74b88dc27e6360, + 0x074854268d14850c, + 0xa145fb7b3e0dcb30, + 0x10843f1b43803b23, + // 2^180 * 7 * B + 0xc5f90455376276dd, + 0xce59158dd7645cd9, + 0x92f65d511d366b39, + 0x11574b6e526996c4, + 0xd56f672de324689b, + 0xd1da8aedb394a981, + 0xdd7b58fe9168cfed, + 0x7ce246cd4d56c1e8, + 0xb8f4308e7f80be53, + 0x5f3cb8cb34a9d397, + 0x18a961bd33cc2b2c, + 0x710045fb3a9af671, + // 2^180 * 8 * B + 0x73f93d36101b95eb, + 0xfaef33794f6f4486, + 0x5651735f8f15e562, + 0x7fa3f19058b40da1, + 0xa03fc862059d699e, + 0x2370cfa19a619e69, + 0xc4fe3b122f823deb, + 0x1d1b056fa7f0844e, + 0x1bc64631e56bf61f, + 0xd379ab106e5382a3, + 0x4d58c57e0540168d, + 0x566256628442d8e4, + // 2^184 * 1 * B + 0xb9e499def6267ff6, + 0x7772ca7b742c0843, + 0x23a0153fe9a4f2b1, + 0x2cdfdfecd5d05006, + 0xdd499cd61ff38640, + 0x29cd9bc3063625a0, + 0x51e2d8023dd73dc3, + 0x4a25707a203b9231, + 0x2ab7668a53f6ed6a, + 0x304242581dd170a1, + 0x4000144c3ae20161, + 0x5721896d248e49fc, + // 2^184 * 2 * B + 0x0b6e5517fd181bae, + 0x9022629f2bb963b4, + 0x5509bce932064625, + 0x578edd74f63c13da, + 0x285d5091a1d0da4e, + 0x4baa6fa7b5fe3e08, + 0x63e5177ce19393b3, + 0x03c935afc4b030fd, + 0x997276c6492b0c3d, + 0x47ccc2c4dfe205fc, + 0xdcd29b84dd623a3c, + 0x3ec2ab590288c7a2, + // 2^184 * 3 * B + 0xa1a0d27be4d87bb9, + 0xa98b4deb61391aed, + 0x99a0ddd073cb9b83, + 0x2dd5c25a200fcace, + 0xa7213a09ae32d1cb, + 0x0f2b87df40f5c2d5, + 0x0baea4c6e81eab29, + 0x0e1bf66c6adbac5e, + 0xe2abd5e9792c887e, + 0x1a020018cb926d5d, + 0xbfba69cdbaae5f1e, + 0x730548b35ae88f5f, + // 2^184 * 4 * B + 0xc43551a3cba8b8ee, + 0x65a26f1db2115f16, + 0x760f4f52ab8c3850, + 0x3043443b411db8ca, + 0x805b094ba1d6e334, + 0xbf3ef17709353f19, + 0x423f06cb0622702b, + 0x585a2277d87845dd, + 0xa18a5f8233d48962, + 0x6698c4b5ec78257f, + 0xa78e6fa5373e41ff, + 0x7656278950ef981f, + // 2^184 * 5 * B + 0x38c3cf59d51fc8c0, + 0x9bedd2fd0506b6f2, + 0x26bf109fab570e8f, + 0x3f4160a8c1b846a6, + 0xe17073a3ea86cf9d, + 0x3a8cfbb707155fdc, + 0x4853e7fc31838a8e, + 0x28bbf484b613f616, + 0xf2612f5c6f136c7c, + 0xafead107f6dd11be, + 0x527e9ad213de6f33, + 0x1e79cb358188f75d, + // 2^184 * 6 * B + 0x013436c3eef7e3f1, + 0x828b6a7ffe9e10f8, + 0x7ff908e5bcf9defc, + 0x65d7951b3a3b3831, + 0x77e953d8f5e08181, + 0x84a50c44299dded9, + 0xdc6c2d0c864525e5, + 0x478ab52d39d1f2f4, + 0x66a6a4d39252d159, + 0xe5dde1bc871ac807, + 0xb82c6b40a6c1c96f, + 0x16d87a411a212214, + // 2^184 * 7 * B + 0xb3bd7e5a42066215, + 0x879be3cd0c5a24c1, + 0x57c05db1d6f994b7, + 0x28f87c8165f38ca6, + 0xfba4d5e2d54e0583, + 0xe21fafd72ebd99fa, + 0x497ac2736ee9778f, + 0x1f990b577a5a6dde, + 0xa3344ead1be8f7d6, + 0x7d1e50ebacea798f, + 0x77c6569e520de052, + 0x45882fe1534d6d3e, + // 2^184 * 8 * B + 0x6669345d757983d6, + 0x62b6ed1117aa11a6, + 0x7ddd1857985e128f, + 0x688fe5b8f626f6dd, + 0xd8ac9929943c6fe4, + 0xb5f9f161a38392a2, + 0x2699db13bec89af3, + 0x7dcf843ce405f074, + 0x6c90d6484a4732c0, + 0xd52143fdca563299, + 0xb3be28c3915dc6e1, + 0x6739687e7327191b, + // 2^188 * 1 * B + 0x9f65c5ea200814cf, + 0x840536e169a31740, + 0x8b0ed13925c8b4ad, + 0x0080dbafe936361d, + 0x8ce5aad0c9cb971f, + 0x1156aaa99fd54a29, + 0x41f7247015af9b78, + 0x1fe8cca8420f49aa, + 0x72a1848f3c0cc82a, + 0x38c560c2877c9e54, + 0x5004e228ce554140, + 0x042418a103429d71, + // 2^188 * 2 * B + 0x899dea51abf3ff5f, + 0x9b93a8672fc2d8ba, + 0x2c38cb97be6ebd5c, + 0x114d578497263b5d, + 0x58e84c6f20816247, + 0x8db2b2b6e36fd793, + 0x977182561d484d85, + 0x0822024f8632abd7, + 0xb301bb7c6b1beca3, + 0x55393f6dc6eb1375, + 0x910d281097b6e4eb, + 0x1ad4548d9d479ea3, + // 2^188 * 3 * B + 0xcd5a7da0389a48fd, + 0xb38fa4aa9a78371e, + 0xc6d9761b2cdb8e6c, + 0x35cf51dbc97e1443, + 0xa06fe66d0fe9fed3, + 0xa8733a401c587909, + 0x30d14d800df98953, + 0x41ce5876c7b30258, + 0x59ac3bc5d670c022, + 0xeae67c109b119406, + 0x9798bdf0b3782fda, + 0x651e3201fd074092, + // 2^188 * 4 * B + 0xd63d8483ef30c5cf, + 0x4cd4b4962361cc0c, + 0xee90e500a48426ac, + 0x0af51d7d18c14eeb, + 0xa57ba4a01efcae9e, + 0x769f4beedc308a94, + 0xd1f10eeb3603cb2e, + 0x4099ce5e7e441278, + 0x1ac98e4f8a5121e9, + 0x7dae9544dbfa2fe0, + 0x8320aa0dd6430df9, + 0x667282652c4a2fb5, + // 2^188 * 5 * B + 0x874621f4d86bc9ab, + 0xb54c7bbe56fe6fea, + 0x077a24257fadc22c, + 0x1ab53be419b90d39, + 0xada8b6e02946db23, + 0x1c0ce51a7b253ab7, + 0x8448c85a66dd485b, + 0x7f1fc025d0675adf, + 0xd8ee1b18319ea6aa, + 0x004d88083a21f0da, + 0x3bd6aa1d883a4f4b, + 0x4db9a3a6dfd9fd14, + // 2^188 * 6 * B + 0x8ce7b23bb99c0755, + 0x35c5d6edc4f50f7a, + 0x7e1e2ed2ed9b50c3, + 0x36305f16e8934da1, + 0xd95b00bbcbb77c68, + 0xddbc846a91f17849, + 0x7cf700aebe28d9b3, + 0x5ce1285c85d31f3e, + 0x31b6972d98b0bde8, + 0x7d920706aca6de5b, + 0xe67310f8908a659f, + 0x50fac2a6efdf0235, + // 2^188 * 7 * B + 0xf3d3a9f35b880f5a, + 0xedec050cdb03e7c2, + 0xa896981ff9f0b1a2, + 0x49a4ae2bac5e34a4, + 0x295b1c86f6f449bc, + 0x51b2e84a1f0ab4dd, + 0xc001cb30aa8e551d, + 0x6a28d35944f43662, + 0x28bb12ee04a740e0, + 0x14313bbd9bce8174, + 0x72f5b5e4e8c10c40, + 0x7cbfb19936adcd5b, + // 2^188 * 8 * B + 0xa311ddc26b89792d, + 0x1b30b4c6da512664, + 0x0ca77b4ccf150859, + 0x1de443df1b009408, + 0x8e793a7acc36e6e0, + 0xf9fab7a37d586eed, + 0x3a4f9692bae1f4e4, + 0x1c14b03eff5f447e, + 0x19647bd114a85291, + 0x57b76cb21034d3af, + 0x6329db440f9d6dfa, + 0x5ef43e586a571493, + // 2^192 * 1 * B + 0xef782014385675a6, + 0xa2649f30aafda9e8, + 0x4cd1eb505cdfa8cb, + 0x46115aba1d4dc0b3, + 0xa66dcc9dc80c1ac0, + 0x97a05cf41b38a436, + 0xa7ebf3be95dbd7c6, + 0x7da0b8f68d7e7dab, + 0xd40f1953c3b5da76, + 0x1dac6f7321119e9b, + 0x03cc6021feb25960, + 0x5a5f887e83674b4b, + // 2^192 * 2 * B + 0x8f6301cf70a13d11, + 0xcfceb815350dd0c4, + 0xf70297d4a4bca47e, + 0x3669b656e44d1434, + 0x9e9628d3a0a643b9, + 0xb5c3cb00e6c32064, + 0x9b5302897c2dec32, + 0x43e37ae2d5d1c70c, + 0x387e3f06eda6e133, + 0x67301d5199a13ac0, + 0xbd5ad8f836263811, + 0x6a21e6cd4fd5e9be, + // 2^192 * 3 * B + 0xf1c6170a3046e65f, + 0x58712a2a00d23524, + 0x69dbbd3c8c82b755, + 0x586bf9f1a195ff57, + 0xef4129126699b2e3, + 0x71d30847708d1301, + 0x325432d01182b0bd, + 0x45371b07001e8b36, + 0xa6db088d5ef8790b, + 0x5278f0dc610937e5, + 0xac0349d261a16eb8, + 0x0eafb03790e52179, + // 2^192 * 4 * B + 0x960555c13748042f, + 0x219a41e6820baa11, + 0x1c81f73873486d0c, + 0x309acc675a02c661, + 0x5140805e0f75ae1d, + 0xec02fbe32662cc30, + 0x2cebdf1eea92396d, + 0x44ae3344c5435bb3, + 0x9cf289b9bba543ee, + 0xf3760e9d5ac97142, + 0x1d82e5c64f9360aa, + 0x62d5221b7f94678f, + // 2^192 * 5 * B + 0x524c299c18d0936d, + 0xc86bb56c8a0c1a0c, + 0xa375052edb4a8631, + 0x5c0efde4bc754562, + 0x7585d4263af77a3c, + 0xdfae7b11fee9144d, + 0xa506708059f7193d, + 0x14f29a5383922037, + 0xdf717edc25b2d7f5, + 0x21f970db99b53040, + 0xda9234b7c3ed4c62, + 0x5e72365c7bee093e, + // 2^192 * 6 * B + 0x575bfc074571217f, + 0x3779675d0694d95b, + 0x9a0a37bbf4191e33, + 0x77f1104c47b4eabc, + 0x7d9339062f08b33e, + 0x5b9659e5df9f32be, + 0xacff3dad1f9ebdfd, + 0x70b20555cb7349b7, + 0xbe5113c555112c4c, + 0x6688423a9a881fcd, + 0x446677855e503b47, + 0x0e34398f4a06404a, + // 2^192 * 7 * B + 0xb67d22d93ecebde8, + 0x09b3e84127822f07, + 0x743fa61fb05b6d8d, + 0x5e5405368a362372, + 0x18930b093e4b1928, + 0x7de3e10e73f3f640, + 0xf43217da73395d6f, + 0x6f8aded6ca379c3e, + 0xe340123dfdb7b29a, + 0x487b97e1a21ab291, + 0xf9967d02fde6949e, + 0x780de72ec8d3de97, + // 2^192 * 8 * B + 0x0ae28545089ae7bc, + 0x388ddecf1c7f4d06, + 0x38ac15510a4811b8, + 0x0eb28bf671928ce4, + 0x671feaf300f42772, + 0x8f72eb2a2a8c41aa, + 0x29a17fd797373292, + 0x1defc6ad32b587a6, + 0xaf5bbe1aef5195a7, + 0x148c1277917b15ed, + 0x2991f7fb7ae5da2e, + 0x467d201bf8dd2867, + // 2^196 * 1 * B + 0x7906ee72f7bd2e6b, + 0x05d270d6109abf4e, + 0x8d5cfe45b941a8a4, + 0x44c218671c974287, + 0x745f9d56296bc318, + 0x993580d4d8152e65, + 0xb0e5b13f5839e9ce, + 0x51fc2b28d43921c0, + 0x1b8fd11795e2a98c, + 0x1c4e5ee12b6b6291, + 0x5b30e7107424b572, + 0x6e6b9de84c4f4ac6, + // 2^196 * 2 * B + 0xdff25fce4b1de151, + 0xd841c0c7e11c4025, + 0x2554b3c854749c87, + 0x2d292459908e0df9, + 0x6b7c5f10f80cb088, + 0x736b54dc56e42151, + 0xc2b620a5c6ef99c4, + 0x5f4c802cc3a06f42, + 0x9b65c8f17d0752da, + 0x881ce338c77ee800, + 0xc3b514f05b62f9e3, + 0x66ed5dd5bec10d48, + // 2^196 * 3 * B + 0x7d38a1c20bb2089d, + 0x808334e196ccd412, + 0xc4a70b8c6c97d313, + 0x2eacf8bc03007f20, + 0xf0adf3c9cbca047d, + 0x81c3b2cbf4552f6b, + 0xcfda112d44735f93, + 0x1f23a0c77e20048c, + 0xf235467be5bc1570, + 0x03d2d9020dbab38c, + 0x27529aa2fcf9e09e, + 0x0840bef29d34bc50, + // 2^196 * 4 * B + 0x796dfb35dc10b287, + 0x27176bcd5c7ff29d, + 0x7f3d43e8c7b24905, + 0x0304f5a191c54276, + 0xcd54e06b7f37e4eb, + 0x8cc15f87f5e96cca, + 0xb8248bb0d3597dce, + 0x246affa06074400c, + 0x37d88e68fbe45321, + 0x86097548c0d75032, + 0x4e9b13ef894a0d35, + 0x25a83cac5753d325, + // 2^196 * 5 * B + 0x10222f48eed8165e, + 0x623fc1234b8bcf3a, + 0x1e145c09c221e8f0, + 0x7ccfa59fca782630, + 0x9f0f66293952b6e2, + 0x33db5e0e0934267b, + 0xff45252bd609fedc, + 0x06be10f5c506e0c9, + 0x1a9615a9b62a345f, + 0x22050c564a52fecc, + 0xa7a2788528bc0dfe, + 0x5e82770a1a1ee71d, + // 2^196 * 6 * B + 0x35425183ad896a5c, + 0xe8673afbe78d52f6, + 0x2c66f25f92a35f64, + 0x09d04f3b3b86b102, + 0xe802e80a42339c74, + 0x34175166a7fffae5, + 0x34865d1f1c408cae, + 0x2cca982c605bc5ee, + 0xfd2d5d35197dbe6e, + 0x207c2eea8be4ffa3, + 0x2613d8db325ae918, + 0x7a325d1727741d3e, + // 2^196 * 7 * B + 0xd036b9bbd16dfde2, + 0xa2055757c497a829, + 0x8e6cc966a7f12667, + 0x4d3b1a791239c180, + 0xecd27d017e2a076a, + 0xd788689f1636495e, + 0x52a61af0919233e5, + 0x2a479df17bb1ae64, + 0x9e5eee8e33db2710, + 0x189854ded6c43ca5, + 0xa41c22c592718138, + 0x27ad5538a43a5e9b, + // 2^196 * 8 * B + 0x2746dd4b15350d61, + 0xd03fcbc8ee9521b7, + 0xe86e365a138672ca, + 0x510e987f7e7d89e2, + 0xcb5a7d638e47077c, + 0x8db7536120a1c059, + 0x549e1e4d8bedfdcc, + 0x080153b7503b179d, + 0xdda69d930a3ed3e3, + 0x3d386ef1cd60a722, + 0xc817ad58bdaa4ee6, + 0x23be8d554fe7372a, + // 2^200 * 1 * B + 0x95fe919a74ef4fad, + 0x3a827becf6a308a2, + 0x964e01d309a47b01, + 0x71c43c4f5ba3c797, + 0xbc1ef4bd567ae7a9, + 0x3f624cb2d64498bd, + 0xe41064d22c1f4ec8, + 0x2ef9c5a5ba384001, + 0xb6fd6df6fa9e74cd, + 0xf18278bce4af267a, + 0x8255b3d0f1ef990e, + 0x5a758ca390c5f293, + // 2^200 * 2 * B + 0xa2b72710d9462495, + 0x3aa8c6d2d57d5003, + 0xe3d400bfa0b487ca, + 0x2dbae244b3eb72ec, + 0x8ce0918b1d61dc94, + 0x8ded36469a813066, + 0xd4e6a829afe8aad3, + 0x0a738027f639d43f, + 0x980f4a2f57ffe1cc, + 0x00670d0de1839843, + 0x105c3f4a49fb15fd, + 0x2698ca635126a69c, + // 2^200 * 3 * B + 0xe765318832b0ba78, + 0x381831f7925cff8b, + 0x08a81b91a0291fcc, + 0x1fb43dcc49caeb07, + 0x2e3d702f5e3dd90e, + 0x9e3f0918e4d25386, + 0x5e773ef6024da96a, + 0x3c004b0c4afa3332, + 0x9aa946ac06f4b82b, + 0x1ca284a5a806c4f3, + 0x3ed3265fc6cd4787, + 0x6b43fd01cd1fd217, + // 2^200 * 4 * B + 0xc7a75d4b4697c544, + 0x15fdf848df0fffbf, + 0x2868b9ebaa46785a, + 0x5a68d7105b52f714, + 0xb5c742583e760ef3, + 0x75dc52b9ee0ab990, + 0xbf1427c2072b923f, + 0x73420b2d6ff0d9f0, + 0xaf2cf6cb9e851e06, + 0x8f593913c62238c4, + 0xda8ab89699fbf373, + 0x3db5632fea34bc9e, + // 2^200 * 5 * B + 0xf46eee2bf75dd9d8, + 0x0d17b1f6396759a5, + 0x1bf2d131499e7273, + 0x04321adf49d75f13, + 0x2e4990b1829825d5, + 0xedeaeb873e9a8991, + 0xeef03d394c704af8, + 0x59197ea495df2b0e, + 0x04e16019e4e55aae, + 0xe77b437a7e2f92e9, + 0xc7ce2dc16f159aa4, + 0x45eafdc1f4d70cc0, + // 2^200 * 6 * B + 0x698401858045d72b, + 0x4c22faa2cf2f0651, + 0x941a36656b222dc6, + 0x5a5eebc80362dade, + 0xb60e4624cfccb1ed, + 0x59dbc292bd5c0395, + 0x31a09d1ddc0481c9, + 0x3f73ceea5d56d940, + 0xb7a7bfd10a4e8dc6, + 0xbe57007e44c9b339, + 0x60c1207f1557aefa, + 0x26058891266218db, + // 2^200 * 7 * B + 0x59f704a68360ff04, + 0xc3d93fde7661e6f4, + 0x831b2a7312873551, + 0x54ad0c2e4e615d57, + 0x4c818e3cc676e542, + 0x5e422c9303ceccad, + 0xec07cccab4129f08, + 0x0dedfa10b24443b8, + 0xee3b67d5b82b522a, + 0x36f163469fa5c1eb, + 0xa5b4d2f26ec19fd3, + 0x62ecb2baa77a9408, + // 2^200 * 8 * B + 0xe5ed795261152b3d, + 0x4962357d0eddd7d1, + 0x7482c8d0b96b4c71, + 0x2e59f919a966d8be, + 0x92072836afb62874, + 0x5fcd5e8579e104a5, + 0x5aad01adc630a14a, + 0x61913d5075663f98, + 0x0dc62d361a3231da, + 0xfa47583294200270, + 0x02d801513f9594ce, + 0x3ddbc2a131c05d5c, + // 2^204 * 1 * B + 0x3f50a50a4ffb81ef, + 0xb1e035093bf420bf, + 0x9baa8e1cc6aa2cd0, + 0x32239861fa237a40, + 0xfb735ac2004a35d1, + 0x31de0f433a6607c3, + 0x7b8591bfc528d599, + 0x55be9a25f5bb050c, + 0x0d005acd33db3dbf, + 0x0111b37c80ac35e2, + 0x4892d66c6f88ebeb, + 0x770eadb16508fbcd, + // 2^204 * 2 * B + 0x8451f9e05e4e89dd, + 0xc06302ffbc793937, + 0x5d22749556a6495c, + 0x09a6755ca05603fb, + 0xf1d3b681a05071b9, + 0x2207659a3592ff3a, + 0x5f0169297881e40e, + 0x16bedd0e86ba374e, + 0x5ecccc4f2c2737b5, + 0x43b79e0c2dccb703, + 0x33e008bc4ec43df3, + 0x06c1b840f07566c0, + // 2^204 * 3 * B + 0x7688a5c6a388f877, + 0x02a96c14deb2b6ac, + 0x64c9f3431b8c2af8, + 0x3628435554a1eed6, + 0x69ee9e7f9b02805c, + 0xcbff828a547d1640, + 0x3d93a869b2430968, + 0x46b7b8cd3fe26972, + 0xe9812086fe7eebe0, + 0x4cba6be72f515437, + 0x1d04168b516efae9, + 0x5ea1391043982cb9, + // 2^204 * 4 * B + 0x49125c9cf4702ee1, + 0x4520b71f8b25b32d, + 0x33193026501fef7e, + 0x656d8997c8d2eb2b, + 0x6f2b3be4d5d3b002, + 0xafec33d96a09c880, + 0x035f73a4a8bcc4cc, + 0x22c5b9284662198b, + 0xcb58c8fe433d8939, + 0x89a0cb2e6a8d7e50, + 0x79ca955309fbbe5a, + 0x0c626616cd7fc106, + // 2^204 * 5 * B + 0x1ffeb80a4879b61f, + 0x6396726e4ada21ed, + 0x33c7b093368025ba, + 0x471aa0c6f3c31788, + 0x8fdfc379fbf454b1, + 0x45a5a970f1a4b771, + 0xac921ef7bad35915, + 0x42d088dca81c2192, + 0x8fda0f37a0165199, + 0x0adadb77c8a0e343, + 0x20fbfdfcc875e820, + 0x1cf2bea80c2206e7, + // 2^204 * 6 * B + 0xc2ddf1deb36202ac, + 0x92a5fe09d2e27aa5, + 0x7d1648f6fc09f1d3, + 0x74c2cc0513bc4959, + 0x982d6e1a02c0412f, + 0x90fa4c83db58e8fe, + 0x01c2f5bcdcb18bc0, + 0x686e0c90216abc66, + 0x1fadbadba54395a7, + 0xb41a02a0ae0da66a, + 0xbf19f598bba37c07, + 0x6a12b8acde48430d, + // 2^204 * 7 * B + 0xf8daea1f39d495d9, + 0x592c190e525f1dfc, + 0xdb8cbd04c9991d1b, + 0x11f7fda3d88f0cb7, + 0x793bdd801aaeeb5f, + 0x00a2a0aac1518871, + 0xe8a373a31f2136b4, + 0x48aab888fc91ef19, + 0x041f7e925830f40e, + 0x002d6ca979661c06, + 0x86dc9ff92b046a2e, + 0x760360928b0493d1, + // 2^204 * 8 * B + 0x21bb41c6120cf9c6, + 0xeab2aa12decda59b, + 0xc1a72d020aa48b34, + 0x215d4d27e87d3b68, + 0xb43108e5695a0b05, + 0x6cb00ee8ad37a38b, + 0x5edad6eea3537381, + 0x3f2602d4b6dc3224, + 0xc8b247b65bcaf19c, + 0x49779dc3b1b2c652, + 0x89a180bbd5ece2e2, + 0x13f098a3cec8e039, + // 2^208 * 1 * B + 0x9adc0ff9ce5ec54b, + 0x039c2a6b8c2f130d, + 0x028007c7f0f89515, + 0x78968314ac04b36b, + 0xf3aa57a22796bb14, + 0x883abab79b07da21, + 0xe54be21831a0391c, + 0x5ee7fb38d83205f9, + 0x538dfdcb41446a8e, + 0xa5acfda9434937f9, + 0x46af908d263c8c78, + 0x61d0633c9bca0d09, + // 2^208 * 2 * B + 0x63744935ffdb2566, + 0xc5bd6b89780b68bb, + 0x6f1b3280553eec03, + 0x6e965fd847aed7f5, + 0xada328bcf8fc73df, + 0xee84695da6f037fc, + 0x637fb4db38c2a909, + 0x5b23ac2df8067bdc, + 0x9ad2b953ee80527b, + 0xe88f19aafade6d8d, + 0x0e711704150e82cf, + 0x79b9bbb9dd95dedc, + // 2^208 * 3 * B + 0xebb355406a3126c2, + 0xd26383a868c8c393, + 0x6c0c6429e5b97a82, + 0x5065f158c9fd2147, + 0xd1997dae8e9f7374, + 0xa032a2f8cfbb0816, + 0xcd6cba126d445f0a, + 0x1ba811460accb834, + 0x708169fb0c429954, + 0xe14600acd76ecf67, + 0x2eaab98a70e645ba, + 0x3981f39e58a4faf2, + // 2^208 * 4 * B + 0x18fb8a7559230a93, + 0x1d168f6960e6f45d, + 0x3a85a94514a93cb5, + 0x38dc083705acd0fd, + 0xc845dfa56de66fde, + 0xe152a5002c40483a, + 0xe9d2e163c7b4f632, + 0x30f4452edcbc1b65, + 0x856d2782c5759740, + 0xfa134569f99cbecc, + 0x8844fc73c0ea4e71, + 0x632d9a1a593f2469, + // 2^208 * 5 * B + 0xf6bb6b15b807cba6, + 0x1823c7dfbc54f0d7, + 0xbb1d97036e29670b, + 0x0b24f48847ed4a57, + 0xbf09fd11ed0c84a7, + 0x63f071810d9f693a, + 0x21908c2d57cf8779, + 0x3a5a7df28af64ba2, + 0xdcdad4be511beac7, + 0xa4538075ed26ccf2, + 0xe19cff9f005f9a65, + 0x34fcf74475481f63, + // 2^208 * 6 * B + 0xc197e04c789767ca, + 0xb8714dcb38d9467d, + 0x55de888283f95fa8, + 0x3d3bdc164dfa63f7, + 0xa5bb1dab78cfaa98, + 0x5ceda267190b72f2, + 0x9309c9110a92608e, + 0x0119a3042fb374b0, + 0x67a2d89ce8c2177d, + 0x669da5f66895d0c1, + 0xf56598e5b282a2b0, + 0x56c088f1ede20a73, + // 2^208 * 7 * B + 0x336d3d1110a86e17, + 0xd7f388320b75b2fa, + 0xf915337625072988, + 0x09674c6b99108b87, + 0x581b5fac24f38f02, + 0xa90be9febae30cbd, + 0x9a2169028acf92f0, + 0x038b7ea48359038f, + 0x9f4ef82199316ff8, + 0x2f49d282eaa78d4f, + 0x0971a5ab5aef3174, + 0x6e5e31025969eb65, + // 2^208 * 8 * B + 0xb16c62f587e593fb, + 0x4999eddeca5d3e71, + 0xb491c1e014cc3e6d, + 0x08f5114789a8dba8, + 0x3304fb0e63066222, + 0xfb35068987acba3f, + 0xbd1924778c1061a3, + 0x3058ad43d1838620, + 0x323c0ffde57663d0, + 0x05c3df38a22ea610, + 0xbdc78abdac994f9a, + 0x26549fa4efe3dc99, + // 2^212 * 1 * B + 0x738b38d787ce8f89, + 0xb62658e24179a88d, + 0x30738c9cf151316d, + 0x49128c7f727275c9, + 0x04dbbc17f75396b9, + 0x69e6a2d7d2f86746, + 0xc6409d99f53eabc6, + 0x606175f6332e25d2, + 0x4021370ef540e7dd, + 0x0910d6f5a1f1d0a5, + 0x4634aacd5b06b807, + 0x6a39e6356944f235, + // 2^212 * 2 * B + 0x96cd5640df90f3e7, + 0x6c3a760edbfa25ea, + 0x24f3ef0959e33cc4, + 0x42889e7e530d2e58, + 0x1da1965774049e9d, + 0xfbcd6ea198fe352b, + 0xb1cbcd50cc5236a6, + 0x1f5ec83d3f9846e2, + 0x8efb23c3328ccb75, + 0xaf42a207dd876ee9, + 0x20fbdadc5dfae796, + 0x241e246b06bf9f51, + // 2^212 * 3 * B + 0x29e68e57ad6e98f6, + 0x4c9260c80b462065, + 0x3f00862ea51ebb4b, + 0x5bc2c77fb38d9097, + 0x7eaafc9a6280bbb8, + 0x22a70f12f403d809, + 0x31ce40bb1bfc8d20, + 0x2bc65635e8bd53ee, + 0xe8d5dc9fa96bad93, + 0xe58fb17dde1947dc, + 0x681532ea65185fa3, + 0x1fdd6c3b034a7830, + // 2^212 * 4 * B + 0x0a64e28c55dc18fe, + 0xe3df9e993399ebdd, + 0x79ac432370e2e652, + 0x35ff7fc33ae4cc0e, + 0x9c13a6a52dd8f7a9, + 0x2dbb1f8c3efdcabf, + 0x961e32405e08f7b5, + 0x48c8a121bbe6c9e5, + 0xfc415a7c59646445, + 0xd224b2d7c128b615, + 0x6035c9c905fbb912, + 0x42d7a91274429fab, + // 2^212 * 5 * B + 0x4e6213e3eaf72ed3, + 0x6794981a43acd4e7, + 0xff547cde6eb508cb, + 0x6fed19dd10fcb532, + 0xa9a48947933da5bc, + 0x4a58920ec2e979ec, + 0x96d8800013e5ac4c, + 0x453692d74b48b147, + 0xdd775d99a8559c6f, + 0xf42a2140df003e24, + 0x5223e229da928a66, + 0x063f46ba6d38f22c, + // 2^212 * 6 * B + 0xd2d242895f536694, + 0xca33a2c542939b2c, + 0x986fada6c7ddb95c, + 0x5a152c042f712d5d, + 0x39843cb737346921, + 0xa747fb0738c89447, + 0xcb8d8031a245307e, + 0x67810f8e6d82f068, + 0x3eeb8fbcd2287db4, + 0x72c7d3a301a03e93, + 0x5473e88cbd98265a, + 0x7324aa515921b403, + // 2^212 * 7 * B + 0x857942f46c3cbe8e, + 0xa1d364b14730c046, + 0x1c8ed914d23c41bf, + 0x0838e161eef6d5d2, + 0xad23f6dae82354cb, + 0x6962502ab6571a6d, + 0x9b651636e38e37d1, + 0x5cac5005d1a3312f, + 0x8cc154cce9e39904, + 0x5b3a040b84de6846, + 0xc4d8a61cb1be5d6e, + 0x40fb897bd8861f02, + // 2^212 * 8 * B + 0x84c5aa9062de37a1, + 0x421da5000d1d96e1, + 0x788286306a9242d9, + 0x3c5e464a690d10da, + 0xe57ed8475ab10761, + 0x71435e206fd13746, + 0x342f824ecd025632, + 0x4b16281ea8791e7b, + 0xd1c101d50b813381, + 0xdee60f1176ee6828, + 0x0cb68893383f6409, + 0x6183c565f6ff484a, + // 2^216 * 1 * B + 0x741d5a461e6bf9d6, + 0x2305b3fc7777a581, + 0xd45574a26474d3d9, + 0x1926e1dc6401e0ff, + 0xdb468549af3f666e, + 0xd77fcf04f14a0ea5, + 0x3df23ff7a4ba0c47, + 0x3a10dfe132ce3c85, + 0xe07f4e8aea17cea0, + 0x2fd515463a1fc1fd, + 0x175322fd31f2c0f1, + 0x1fa1d01d861e5d15, + // 2^216 * 2 * B + 0xcc8055947d599832, + 0x1e4656da37f15520, + 0x99f6f7744e059320, + 0x773563bc6a75cf33, + 0x38dcac00d1df94ab, + 0x2e712bddd1080de9, + 0x7f13e93efdd5e262, + 0x73fced18ee9a01e5, + 0x06b1e90863139cb3, + 0xa493da67c5a03ecd, + 0x8d77cec8ad638932, + 0x1f426b701b864f44, + // 2^216 * 3 * B + 0xefc9264c41911c01, + 0xf1a3b7b817a22c25, + 0x5875da6bf30f1447, + 0x4e1af5271d31b090, + 0xf17e35c891a12552, + 0xb76b8153575e9c76, + 0xfa83406f0d9b723e, + 0x0b76bb1b3fa7e438, + 0x08b8c1f97f92939b, + 0xbe6771cbd444ab6e, + 0x22e5646399bb8017, + 0x7b6dd61eb772a955, + // 2^216 * 4 * B + 0xb7adc1e850f33d92, + 0x7998fa4f608cd5cf, + 0xad962dbd8dfc5bdb, + 0x703e9bceaf1d2f4f, + 0x5730abf9ab01d2c7, + 0x16fb76dc40143b18, + 0x866cbe65a0cbb281, + 0x53fa9b659bff6afe, + 0x6c14c8e994885455, + 0x843a5d6665aed4e5, + 0x181bb73ebcd65af1, + 0x398d93e5c4c61f50, + // 2^216 * 5 * B + 0x1c4bd16733e248f3, + 0xbd9e128715bf0a5f, + 0xd43f8cf0a10b0376, + 0x53b09b5ddf191b13, + 0xc3877c60d2e7e3f2, + 0x3b34aaa030828bb1, + 0x283e26e7739ef138, + 0x699c9c9002c30577, + 0xf306a7235946f1cc, + 0x921718b5cce5d97d, + 0x28cdd24781b4e975, + 0x51caf30c6fcdd907, + // 2^216 * 6 * B + 0xa60ba7427674e00a, + 0x630e8570a17a7bf3, + 0x3758563dcf3324cc, + 0x5504aa292383fdaa, + 0x737af99a18ac54c7, + 0x903378dcc51cb30f, + 0x2b89bc334ce10cc7, + 0x12ae29c189f8e99a, + 0xa99ec0cb1f0d01cf, + 0x0dd1efcc3a34f7ae, + 0x55ca7521d09c4e22, + 0x5fd14fe958eba5ea, + // 2^216 * 7 * B + 0xb5dc2ddf2845ab2c, + 0x069491b10a7fe993, + 0x4daaf3d64002e346, + 0x093ff26e586474d1, + 0x3c42fe5ebf93cb8e, + 0xbedfa85136d4565f, + 0xe0f0859e884220e8, + 0x7dd73f960725d128, + 0xb10d24fe68059829, + 0x75730672dbaf23e5, + 0x1367253ab457ac29, + 0x2f59bcbc86b470a4, + // 2^216 * 8 * B + 0x83847d429917135f, + 0xad1b911f567d03d7, + 0x7e7748d9be77aad1, + 0x5458b42e2e51af4a, + 0x7041d560b691c301, + 0x85201b3fadd7e71e, + 0x16c2e16311335585, + 0x2aa55e3d010828b1, + 0xed5192e60c07444f, + 0x42c54e2d74421d10, + 0x352b4c82fdb5c864, + 0x13e9004a8a768664, + // 2^220 * 1 * B + 0xcbb5b5556c032bff, + 0xdf7191b729297a3a, + 0xc1ff7326aded81bb, + 0x71ade8bb68be03f5, + 0x1e6284c5806b467c, + 0xc5f6997be75d607b, + 0x8b67d958b378d262, + 0x3d88d66a81cd8b70, + 0x8b767a93204ed789, + 0x762fcacb9fa0ae2a, + 0x771febcc6dce4887, + 0x343062158ff05fb3, + // 2^220 * 2 * B + 0xe05da1a7e1f5bf49, + 0x26457d6dd4736092, + 0x77dcb07773cc32f6, + 0x0a5d94969cdd5fcd, + 0xfce219072a7b31b4, + 0x4d7adc75aa578016, + 0x0ec276a687479324, + 0x6d6d9d5d1fda4beb, + 0x22b1a58ae9b08183, + 0xfd95d071c15c388b, + 0xa9812376850a0517, + 0x33384cbabb7f335e, + // 2^220 * 3 * B + 0x3c6fa2680ca2c7b5, + 0x1b5082046fb64fda, + 0xeb53349c5431d6de, + 0x5278b38f6b879c89, + 0x33bc627a26218b8d, + 0xea80b21fc7a80c61, + 0x9458b12b173e9ee6, + 0x076247be0e2f3059, + 0x52e105f61416375a, + 0xec97af3685abeba4, + 0x26e6b50623a67c36, + 0x5cf0e856f3d4fb01, + // 2^220 * 4 * B + 0xf6c968731ae8cab4, + 0x5e20741ecb4f92c5, + 0x2da53be58ccdbc3e, + 0x2dddfea269970df7, + 0xbeaece313db342a8, + 0xcba3635b842db7ee, + 0xe88c6620817f13ef, + 0x1b9438aa4e76d5c6, + 0x8a50777e166f031a, + 0x067b39f10fb7a328, + 0x1925c9a6010fbd76, + 0x6df9b575cc740905, + // 2^220 * 5 * B + 0x42c1192927f6bdcf, + 0x8f91917a403d61ca, + 0xdc1c5a668b9e1f61, + 0x1596047804ec0f8d, + 0xecdfc35b48cade41, + 0x6a88471fb2328270, + 0x740a4a2440a01b6a, + 0x471e5796003b5f29, + 0xda96bbb3aced37ac, + 0x7a2423b5e9208cea, + 0x24cc5c3038aebae2, + 0x50c356afdc5dae2f, + // 2^220 * 6 * B + 0x09dcbf4341c30318, + 0xeeba061183181dce, + 0xc179c0cedc1e29a1, + 0x1dbf7b89073f35b0, + 0xcfed9cdf1b31b964, + 0xf486a9858ca51af3, + 0x14897265ea8c1f84, + 0x784a53dd932acc00, + 0x2d99f9df14fc4920, + 0x76ccb60cc4499fe5, + 0xa4132cbbe5cf0003, + 0x3f93d82354f000ea, + // 2^220 * 7 * B + 0x8183e7689e04ce85, + 0x678fb71e04465341, + 0xad92058f6688edac, + 0x5da350d3532b099a, + 0xeaac12d179e14978, + 0xff923ff3bbebff5e, + 0x4af663e40663ce27, + 0x0fd381a811a5f5ff, + 0xf256aceca436df54, + 0x108b6168ae69d6e8, + 0x20d986cb6b5d036c, + 0x655957b9fee2af50, + // 2^220 * 8 * B + 0xaea8b07fa902030f, + 0xf88c766af463d143, + 0x15b083663c787a60, + 0x08eab1148267a4a8, + 0xbdc1409bd002d0ac, + 0x66660245b5ccd9a6, + 0x82317dc4fade85ec, + 0x02fe934b6ad7df0d, + 0xef5cf100cfb7ea74, + 0x22897633a1cb42ac, + 0xd4ce0c54cef285e2, + 0x30408c048a146a55, + // 2^224 * 1 * B + 0x739d8845832fcedb, + 0xfa38d6c9ae6bf863, + 0x32bc0dcab74ffef7, + 0x73937e8814bce45e, + 0xbb2e00c9193b877f, + 0xece3a890e0dc506b, + 0xecf3b7c036de649f, + 0x5f46040898de9e1a, + 0xb9037116297bf48d, + 0xa9d13b22d4f06834, + 0xe19715574696bdc6, + 0x2cf8a4e891d5e835, + // 2^224 * 2 * B + 0x6d93fd8707110f67, + 0xdd4c09d37c38b549, + 0x7cb16a4cc2736a86, + 0x2049bd6e58252a09, + 0x2cb5487e17d06ba2, + 0x24d2381c3950196b, + 0xd7659c8185978a30, + 0x7a6f7f2891d6a4f6, + 0x7d09fd8d6a9aef49, + 0xf0ee60be5b3db90b, + 0x4c21b52c519ebfd4, + 0x6011aadfc545941d, + // 2^224 * 3 * B + 0x5f67926dcf95f83c, + 0x7c7e856171289071, + 0xd6a1e7f3998f7a5b, + 0x6fc5cc1b0b62f9e0, + 0x63ded0c802cbf890, + 0xfbd098ca0dff6aaa, + 0x624d0afdb9b6ed99, + 0x69ce18b779340b1e, + 0xd1ef5528b29879cb, + 0xdd1aae3cd47e9092, + 0x127e0442189f2352, + 0x15596b3ae57101f1, + // 2^224 * 4 * B + 0x462739d23f9179a2, + 0xff83123197d6ddcf, + 0x1307deb553f2148a, + 0x0d2237687b5f4dda, + 0x09ff31167e5124ca, + 0x0be4158bd9c745df, + 0x292b7d227ef556e5, + 0x3aa4e241afb6d138, + 0x2cc138bf2a3305f5, + 0x48583f8fa2e926c3, + 0x083ab1a25549d2eb, + 0x32fcaa6e4687a36c, + // 2^224 * 5 * B + 0x7bc56e8dc57d9af5, + 0x3e0bd2ed9df0bdf2, + 0xaac014de22efe4a3, + 0x4627e9cefebd6a5c, + 0x3207a4732787ccdf, + 0x17e31908f213e3f8, + 0xd5b2ecd7f60d964e, + 0x746f6336c2600be9, + 0x3f4af345ab6c971c, + 0xe288eb729943731f, + 0x33596a8a0344186d, + 0x7b4917007ed66293, + // 2^224 * 6 * B + 0x2d85fb5cab84b064, + 0x497810d289f3bc14, + 0x476adc447b15ce0c, + 0x122ba376f844fd7b, + 0x54341b28dd53a2dd, + 0xaa17905bdf42fc3f, + 0x0ff592d94dd2f8f4, + 0x1d03620fe08cd37d, + 0xc20232cda2b4e554, + 0x9ed0fd42115d187f, + 0x2eabb4be7dd479d9, + 0x02c70bf52b68ec4c, + // 2^224 * 7 * B + 0xa287ec4b5d0b2fbb, + 0x415c5790074882ca, + 0xe044a61ec1d0815c, + 0x26334f0a409ef5e0, + 0xace532bf458d72e1, + 0x5be768e07cb73cb5, + 0x56cf7d94ee8bbde7, + 0x6b0697e3feb43a03, + 0xb6c8f04adf62a3c0, + 0x3ef000ef076da45d, + 0x9c9cb95849f0d2a9, + 0x1cc37f43441b2fae, + // 2^224 * 8 * B + 0x508f565a5cc7324f, + 0xd061c4c0e506a922, + 0xfb18abdb5c45ac19, + 0x6c6809c10380314a, + 0xd76656f1c9ceaeb9, + 0x1c5b15f818e5656a, + 0x26e72832844c2334, + 0x3a346f772f196838, + 0xd2d55112e2da6ac8, + 0xe9bd0331b1e851ed, + 0x960746dd8ec67262, + 0x05911b9f6ef7c5d0, + // 2^228 * 1 * B + 0xe9dcd756b637ff2d, + 0xec4c348fc987f0c4, + 0xced59285f3fbc7b7, + 0x3305354793e1ea87, + 0x01c18980c5fe9f94, + 0xcd656769716fd5c8, + 0x816045c3d195a086, + 0x6e2b7f3266cc7982, + 0xcc802468f7c3568f, + 0x9de9ba8219974cb3, + 0xabb7229cb5b81360, + 0x44e2017a6fbeba62, + // 2^228 * 2 * B + 0xc4c2a74354dab774, + 0x8e5d4c3c4eaf031a, + 0xb76c23d242838f17, + 0x749a098f68dce4ea, + 0x87f82cf3b6ca6ecd, + 0x580f893e18f4a0c2, + 0x058930072604e557, + 0x6cab6ac256d19c1d, + 0xdcdfe0a02cc1de60, + 0x032665ff51c5575b, + 0x2c0c32f1073abeeb, + 0x6a882014cd7b8606, + // 2^228 * 3 * B + 0xa52a92fea4747fb5, + 0xdc12a4491fa5ab89, + 0xd82da94bb847a4ce, + 0x4d77edce9512cc4e, + 0xd111d17caf4feb6e, + 0x050bba42b33aa4a3, + 0x17514c3ceeb46c30, + 0x54bedb8b1bc27d75, + 0x77c8e14577e2189c, + 0xa3e46f6aff99c445, + 0x3144dfc86d335343, + 0x3a96559e7c4216a9, + // 2^228 * 4 * B + 0x12550d37f42ad2ee, + 0x8b78e00498a1fbf5, + 0x5d53078233894cb2, + 0x02c84e4e3e498d0c, + 0x4493896880baaa52, + 0x4c98afc4f285940e, + 0xef4aa79ba45448b6, + 0x5278c510a57aae7f, + 0xa54dd074294c0b94, + 0xf55d46b8df18ffb6, + 0xf06fecc58dae8366, + 0x588657668190d165, + // 2^228 * 5 * B + 0xd47712311aef7117, + 0x50343101229e92c7, + 0x7a95e1849d159b97, + 0x2449959b8b5d29c9, + 0xbf5834f03de25cc3, + 0xb887c8aed6815496, + 0x5105221a9481e892, + 0x6760ed19f7723f93, + 0x669ba3b7ac35e160, + 0x2eccf73fba842056, + 0x1aec1f17c0804f07, + 0x0d96bc031856f4e7, + // 2^228 * 6 * B + 0x3318be7775c52d82, + 0x4cb764b554d0aab9, + 0xabcf3d27cc773d91, + 0x3bf4d1848123288a, + 0xb1d534b0cc7505e1, + 0x32cd003416c35288, + 0xcb36a5800762c29d, + 0x5bfe69b9237a0bf8, + 0x183eab7e78a151ab, + 0xbbe990c999093763, + 0xff717d6e4ac7e335, + 0x4c5cddb325f39f88, + // 2^228 * 7 * B + 0xc0f6b74d6190a6eb, + 0x20ea81a42db8f4e4, + 0xa8bd6f7d97315760, + 0x33b1d60262ac7c21, + 0x57750967e7a9f902, + 0x2c37fdfc4f5b467e, + 0xb261663a3177ba46, + 0x3a375e78dc2d532b, + 0x8141e72f2d4dddea, + 0xe6eafe9862c607c8, + 0x23c28458573cafd0, + 0x46b9476f4ff97346, + // 2^228 * 8 * B + 0x0c1ffea44f901e5c, + 0x2b0b6fb72184b782, + 0xe587ff910114db88, + 0x37130f364785a142, + 0x1215505c0d58359f, + 0x2a2013c7fc28c46b, + 0x24a0a1af89ea664e, + 0x4400b638a1130e1f, + 0x3a01b76496ed19c3, + 0x31e00ab0ed327230, + 0x520a885783ca15b1, + 0x06aab9875accbec7, + // 2^232 * 1 * B + 0xc1339983f5df0ebb, + 0xc0f3758f512c4cac, + 0x2cf1130a0bb398e1, + 0x6b3cecf9aa270c62, + 0x5349acf3512eeaef, + 0x20c141d31cc1cb49, + 0x24180c07a99a688d, + 0x555ef9d1c64b2d17, + 0x36a770ba3b73bd08, + 0x624aef08a3afbf0c, + 0x5737ff98b40946f2, + 0x675f4de13381749d, + // 2^232 * 2 * B + 0x0e2c52036b1782fc, + 0x64816c816cad83b4, + 0xd0dcbdd96964073e, + 0x13d99df70164c520, + 0xa12ff6d93bdab31d, + 0x0725d80f9d652dfe, + 0x019c4ff39abe9487, + 0x60f450b882cd3c43, + 0x014b5ec321e5c0ca, + 0x4fcb69c9d719bfa2, + 0x4e5f1c18750023a0, + 0x1c06de9e55edac80, + // 2^232 * 3 * B + 0x990f7ad6a33ec4e2, + 0x6608f938be2ee08e, + 0x9ca143c563284515, + 0x4cf38a1fec2db60d, + 0xffd52b40ff6d69aa, + 0x34530b18dc4049bb, + 0x5e4a5c2fa34d9897, + 0x78096f8e7d32ba2d, + 0xa0aaaa650dfa5ce7, + 0xf9c49e2a48b5478c, + 0x4f09cc7d7003725b, + 0x373cad3a26091abe, + // 2^232 * 4 * B + 0xb294634d82c9f57c, + 0x1fcbfde124934536, + 0x9e9c4db3418cdb5a, + 0x0040f3d9454419fc, + 0xf1bea8fb89ddbbad, + 0x3bcb2cbc61aeaecb, + 0x8f58a7bb1f9b8d9d, + 0x21547eda5112a686, + 0xdefde939fd5986d3, + 0xf4272c89510a380c, + 0xb72ba407bb3119b9, + 0x63550a334a254df4, + // 2^232 * 5 * B + 0x6507d6edb569cf37, + 0x178429b00ca52ee1, + 0xea7c0090eb6bd65d, + 0x3eea62c7daf78f51, + 0x9bba584572547b49, + 0xf305c6fae2c408e0, + 0x60e8fa69c734f18d, + 0x39a92bafaa7d767a, + 0x9d24c713e693274e, + 0x5f63857768dbd375, + 0x70525560eb8ab39a, + 0x68436a0665c9c4cd, + // 2^232 * 6 * B + 0xbc0235e8202f3f27, + 0xc75c00e264f975b0, + 0x91a4e9d5a38c2416, + 0x17b6e7f68ab789f9, + 0x1e56d317e820107c, + 0xc5266844840ae965, + 0xc1e0a1c6320ffc7a, + 0x5373669c91611472, + 0x5d2814ab9a0e5257, + 0x908f2084c9cab3fc, + 0xafcaf5885b2d1eca, + 0x1cb4b5a678f87d11, + // 2^232 * 7 * B + 0xb664c06b394afc6c, + 0x0c88de2498da5fb1, + 0x4f8d03164bcad834, + 0x330bca78de7434a2, + 0x6b74aa62a2a007e7, + 0xf311e0b0f071c7b1, + 0x5707e438000be223, + 0x2dc0fd2d82ef6eac, + 0x982eff841119744e, + 0xf9695e962b074724, + 0xc58ac14fbfc953fb, + 0x3c31be1b369f1cf5, + // 2^232 * 8 * B + 0xb0f4864d08948aee, + 0x07dc19ee91ba1c6f, + 0x7975cdaea6aca158, + 0x330b61134262d4bb, + 0xc168bc93f9cb4272, + 0xaeb8711fc7cedb98, + 0x7f0e52aa34ac8d7a, + 0x41cec1097e7d55bb, + 0xf79619d7a26d808a, + 0xbb1fd49e1d9e156d, + 0x73d7c36cdba1df27, + 0x26b44cd91f28777d, + // 2^236 * 1 * B + 0x300a9035393aa6d8, + 0x2b501131a12bb1cd, + 0x7b1ff677f093c222, + 0x4309c1f8cab82bad, + 0xaf44842db0285f37, + 0x8753189047efc8df, + 0x9574e091f820979a, + 0x0e378d6069615579, + 0xd9fa917183075a55, + 0x4bdb5ad26b009fdc, + 0x7829ad2cd63def0e, + 0x078fc54975fd3877, + // 2^236 * 2 * B + 0x87dfbd1428878f2d, + 0x134636dd1e9421a1, + 0x4f17c951257341a3, + 0x5df98d4bad296cb8, + 0xe2004b5bb833a98a, + 0x44775dec2d4c3330, + 0x3aa244067eace913, + 0x272630e3d58e00a9, + 0xf3678fd0ecc90b54, + 0xf001459b12043599, + 0x26725fbc3758b89b, + 0x4325e4aa73a719ae, + // 2^236 * 3 * B + 0x657dc6ef433c3493, + 0x65375e9f80dbf8c3, + 0x47fd2d465b372dae, + 0x4966ab79796e7947, + 0xed24629acf69f59d, + 0x2a4a1ccedd5abbf4, + 0x3535ca1f56b2d67b, + 0x5d8c68d043b1b42d, + 0xee332d4de3b42b0a, + 0xd84e5a2b16a4601c, + 0x78243877078ba3e4, + 0x77ed1eb4184ee437, + // 2^236 * 4 * B + 0xbfd4e13f201839a0, + 0xaeefffe23e3df161, + 0xb65b04f06b5d1fe3, + 0x52e085fb2b62fbc0, + 0x185d43f89e92ed1a, + 0xb04a1eeafe4719c6, + 0x499fbe88a6f03f4f, + 0x5d8b0d2f3c859bdd, + 0x124079eaa54cf2ba, + 0xd72465eb001b26e7, + 0x6843bcfdc97af7fd, + 0x0524b42b55eacd02, + // 2^236 * 5 * B + 0xfd0d5dbee45447b0, + 0x6cec351a092005ee, + 0x99a47844567579cb, + 0x59d242a216e7fa45, + 0xbc18dcad9b829eac, + 0x23ae7d28b5f579d0, + 0xc346122a69384233, + 0x1a6110b2e7d4ac89, + 0x4f833f6ae66997ac, + 0x6849762a361839a4, + 0x6985dec1970ab525, + 0x53045e89dcb1f546, + // 2^236 * 6 * B + 0xcb8bb346d75353db, + 0xfcfcb24bae511e22, + 0xcba48d40d50ae6ef, + 0x26e3bae5f4f7cb5d, + 0x84da3cde8d45fe12, + 0xbd42c218e444e2d2, + 0xa85196781f7e3598, + 0x7642c93f5616e2b2, + 0x2323daa74595f8e4, + 0xde688c8b857abeb4, + 0x3fc48e961c59326e, + 0x0b2e73ca15c9b8ba, + // 2^236 * 7 * B + 0xd6bb4428c17f5026, + 0x9eb27223fb5a9ca7, + 0xe37ba5031919c644, + 0x21ce380db59a6602, + 0x0e3fbfaf79c03a55, + 0x3077af054cbb5acf, + 0xd5c55245db3de39f, + 0x015e68c1476a4af7, + 0xc1d5285220066a38, + 0x95603e523570aef3, + 0x832659a7226b8a4d, + 0x5dd689091f8eedc9, + // 2^236 * 8 * B + 0xcbac84debfd3c856, + 0x1624c348b35ff244, + 0xb7f88dca5d9cad07, + 0x3b0e574da2c2ebe8, + 0x1d022591a5313084, + 0xca2d4aaed6270872, + 0x86a12b852f0bfd20, + 0x56e6c439ad7da748, + 0xc704ff4942bdbae6, + 0x5e21ade2b2de1f79, + 0xe95db3f35652fad8, + 0x0822b5378f08ebc1, + // 2^240 * 1 * B + 0x51f048478f387475, + 0xb25dbcf49cbecb3c, + 0x9aab1244d99f2055, + 0x2c709e6c1c10a5d6, + 0xe1b7f29362730383, + 0x4b5279ffebca8a2c, + 0xdafc778abfd41314, + 0x7deb10149c72610f, + 0xcb62af6a8766ee7a, + 0x66cbec045553cd0e, + 0x588001380f0be4b5, + 0x08e68e9ff62ce2ea, + // 2^240 * 2 * B + 0x34ad500a4bc130ad, + 0x8d38db493d0bd49c, + 0xa25c3d98500a89be, + 0x2f1f3f87eeba3b09, + 0x2f2d09d50ab8f2f9, + 0xacb9218dc55923df, + 0x4a8f342673766cb9, + 0x4cb13bd738f719f5, + 0xf7848c75e515b64a, + 0xa59501badb4a9038, + 0xc20d313f3f751b50, + 0x19a1e353c0ae2ee8, + // 2^240 * 3 * B + 0x7d1c7560bafa05c3, + 0xb3e1a0a0c6e55e61, + 0xe3529718c0d66473, + 0x41546b11c20c3486, + 0xb42172cdd596bdbd, + 0x93e0454398eefc40, + 0x9fb15347b44109b5, + 0x736bd3990266ae34, + 0x85532d509334b3b4, + 0x46fd114b60816573, + 0xcc5f5f30425c8375, + 0x412295a2b87fab5c, + // 2^240 * 4 * B + 0x19c99b88f57ed6e9, + 0x5393cb266df8c825, + 0x5cee3213b30ad273, + 0x14e153ebb52d2e34, + 0x2e655261e293eac6, + 0x845a92032133acdb, + 0x460975cb7900996b, + 0x0760bb8d195add80, + 0x413e1a17cde6818a, + 0x57156da9ed69a084, + 0x2cbf268f46caccb1, + 0x6b34be9bc33ac5f2, + // 2^240 * 5 * B + 0xf3df2f643a78c0b2, + 0x4c3e971ef22e027c, + 0xec7d1c5e49c1b5a3, + 0x2012c18f0922dd2d, + 0x11fc69656571f2d3, + 0xc6c9e845530e737a, + 0xe33ae7a2d4fe5035, + 0x01b9c7b62e6dd30b, + 0x880b55e55ac89d29, + 0x1483241f45a0a763, + 0x3d36efdfc2e76c1f, + 0x08af5b784e4bade8, + // 2^240 * 6 * B + 0x283499dc881f2533, + 0x9d0525da779323b6, + 0x897addfb673441f4, + 0x32b79d71163a168d, + 0xe27314d289cc2c4b, + 0x4be4bd11a287178d, + 0x18d528d6fa3364ce, + 0x6423c1d5afd9826e, + 0xcc85f8d9edfcb36a, + 0x22bcc28f3746e5f9, + 0xe49de338f9e5d3cd, + 0x480a5efbc13e2dcc, + // 2^240 * 7 * B + 0x0b51e70b01622071, + 0x06b505cf8b1dafc5, + 0x2c6bb061ef5aabcd, + 0x47aa27600cb7bf31, + 0xb6614ce442ce221f, + 0x6e199dcc4c053928, + 0x663fb4a4dc1cbe03, + 0x24b31d47691c8e06, + 0x2a541eedc015f8c3, + 0x11a4fe7e7c693f7c, + 0xf0af66134ea278d6, + 0x545b585d14dda094, + // 2^240 * 8 * B + 0x67bf275ea0d43a0f, + 0xade68e34089beebe, + 0x4289134cd479e72e, + 0x0f62f9c332ba5454, + 0x6204e4d0e3b321e1, + 0x3baa637a28ff1e95, + 0x0b0ccffd5b99bd9e, + 0x4d22dc3e64c8d071, + 0xfcb46589d63b5f39, + 0x5cae6a3f57cbcf61, + 0xfebac2d2953afa05, + 0x1c0fa01a36371436, + // 2^244 * 1 * B + 0xe7547449bc7cd692, + 0x0f9abeaae6f73ddf, + 0x4af01ca700837e29, + 0x63ab1b5d3f1bc183, + 0xc11ee5e854c53fae, + 0x6a0b06c12b4f3ff4, + 0x33540f80e0b67a72, + 0x15f18fc3cd07e3ef, + 0x32750763b028f48c, + 0x06020740556a065f, + 0xd53bd812c3495b58, + 0x08706c9b865f508d, + // 2^244 * 2 * B + 0xf37ca2ab3d343dff, + 0x1a8c6a2d80abc617, + 0x8e49e035d4ccffca, + 0x48b46beebaa1d1b9, + 0xcc991b4138b41246, + 0x243b9c526f9ac26b, + 0xb9ef494db7cbabbd, + 0x5fba433dd082ed00, + 0x9c49e355c9941ad0, + 0xb9734ade74498f84, + 0x41c3fed066663e5c, + 0x0ecfedf8e8e710b3, + // 2^244 * 3 * B + 0x76430f9f9cd470d9, + 0xb62acc9ba42f6008, + 0x1898297c59adad5e, + 0x7789dd2db78c5080, + 0x744f7463e9403762, + 0xf79a8dee8dfcc9c9, + 0x163a649655e4cde3, + 0x3b61788db284f435, + 0xb22228190d6ef6b2, + 0xa94a66b246ce4bfa, + 0x46c1a77a4f0b6cc7, + 0x4236ccffeb7338cf, + // 2^244 * 4 * B + 0x8497404d0d55e274, + 0x6c6663d9c4ad2b53, + 0xec2fb0d9ada95734, + 0x2617e120cdb8f73c, + 0x3bd82dbfda777df6, + 0x71b177cc0b98369e, + 0x1d0e8463850c3699, + 0x5a71945b48e2d1f1, + 0x6f203dd5405b4b42, + 0x327ec60410b24509, + 0x9c347230ac2a8846, + 0x77de29fc11ffeb6a, + // 2^244 * 5 * B + 0xb0ac57c983b778a8, + 0x53cdcca9d7fe912c, + 0x61c2b854ff1f59dc, + 0x3a1a2cf0f0de7dac, + 0x835e138fecced2ca, + 0x8c9eaf13ea963b9a, + 0xc95fbfc0b2160ea6, + 0x575e66f3ad877892, + 0x99803a27c88fcb3a, + 0x345a6789275ec0b0, + 0x459789d0ff6c2be5, + 0x62f882651e70a8b2, + // 2^244 * 6 * B + 0x085ae2c759ff1be4, + 0x149145c93b0e40b7, + 0xc467e7fa7ff27379, + 0x4eeecf0ad5c73a95, + 0x6d822986698a19e0, + 0xdc9821e174d78a71, + 0x41a85f31f6cb1f47, + 0x352721c2bcda9c51, + 0x48329952213fc985, + 0x1087cf0d368a1746, + 0x8e5261b166c15aa5, + 0x2d5b2d842ed24c21, + // 2^244 * 7 * B + 0x02cfebd9ebd3ded1, + 0xd45b217739021974, + 0x7576f813fe30a1b7, + 0x5691b6f9a34ef6c2, + 0x5eb7d13d196ac533, + 0x377234ecdb80be2b, + 0xe144cffc7cf5ae24, + 0x5226bcf9c441acec, + 0x79ee6c7223e5b547, + 0x6f5f50768330d679, + 0xed73e1e96d8adce9, + 0x27c3da1e1d8ccc03, + // 2^244 * 8 * B + 0x7eb9efb23fe24c74, + 0x3e50f49f1651be01, + 0x3ea732dc21858dea, + 0x17377bd75bb810f9, + 0x28302e71630ef9f6, + 0xc2d4a2032b64cee0, + 0x090820304b6292be, + 0x5fca747aa82adf18, + 0x232a03c35c258ea5, + 0x86f23a2c6bcb0cf1, + 0x3dad8d0d2e442166, + 0x04a8933cab76862b, + // 2^248 * 1 * B + 0xd2c604b622943dff, + 0xbc8cbece44cfb3a0, + 0x5d254ff397808678, + 0x0fa3614f3b1ca6bf, + 0x69082b0e8c936a50, + 0xf9c9a035c1dac5b6, + 0x6fb73e54c4dfb634, + 0x4005419b1d2bc140, + 0xa003febdb9be82f0, + 0x2089c1af3a44ac90, + 0xf8499f911954fa8e, + 0x1fba218aef40ab42, + // 2^248 * 2 * B + 0xab549448fac8f53e, + 0x81f6e89a7ba63741, + 0x74fd6c7d6c2b5e01, + 0x392e3acaa8c86e42, + 0x4f3e57043e7b0194, + 0xa81d3eee08daaf7f, + 0xc839c6ab99dcdef1, + 0x6c535d13ff7761d5, + 0x4cbd34e93e8a35af, + 0x2e0781445887e816, + 0x19319c76f29ab0ab, + 0x25e17fe4d50ac13b, + // 2^248 * 3 * B + 0x0a289bd71e04f676, + 0x208e1c52d6420f95, + 0x5186d8b034691fab, + 0x255751442a9fb351, + 0x915f7ff576f121a7, + 0xc34a32272fcd87e3, + 0xccba2fde4d1be526, + 0x6bba828f8969899b, + 0xe2d1bc6690fe3901, + 0x4cb54a18a0997ad5, + 0x971d6914af8460d4, + 0x559d504f7f6b7be4, + // 2^248 * 4 * B + 0xa7738378b3eb54d5, + 0x1d69d366a5553c7c, + 0x0a26cf62f92800ba, + 0x01ab12d5807e3217, + 0x9c4891e7f6d266fd, + 0x0744a19b0307781b, + 0x88388f1d6061e23b, + 0x123ea6a3354bd50e, + 0x118d189041e32d96, + 0xb9ede3c2d8315848, + 0x1eab4271d83245d9, + 0x4a3961e2c918a154, + // 2^248 * 5 * B + 0x71dc3be0f8e6bba0, + 0xd6cef8347effe30a, + 0xa992425fe13a476a, + 0x2cd6bce3fb1db763, + 0x0327d644f3233f1e, + 0x499a260e34fcf016, + 0x83b5a716f2dab979, + 0x68aceead9bd4111f, + 0x38b4c90ef3d7c210, + 0x308e6e24b7ad040c, + 0x3860d9f1b7e73e23, + 0x595760d5b508f597, + // 2^248 * 6 * B + 0x6129bfe104aa6397, + 0x8f960008a4a7fccb, + 0x3f8bc0897d909458, + 0x709fa43edcb291a9, + 0x882acbebfd022790, + 0x89af3305c4115760, + 0x65f492e37d3473f4, + 0x2cb2c5df54515a2b, + 0xeb0a5d8c63fd2aca, + 0xd22bc1662e694eff, + 0x2723f36ef8cbb03a, + 0x70f029ecf0c8131f, + // 2^248 * 7 * B + 0x461307b32eed3e33, + 0xae042f33a45581e7, + 0xc94449d3195f0366, + 0x0b7d5d8a6c314858, + 0x2a6aafaa5e10b0b9, + 0x78f0a370ef041aa9, + 0x773efb77aa3ad61f, + 0x44eca5a2a74bd9e1, + 0x25d448327b95d543, + 0x70d38300a3340f1d, + 0xde1c531c60e1c52b, + 0x272224512c7de9e4, + // 2^248 * 8 * B + 0x1abc92af49c5342e, + 0xffeed811b2e6fad0, + 0xefa28c8dfcc84e29, + 0x11b5df18a44cc543, + 0xbf7bbb8a42a975fc, + 0x8c5c397796ada358, + 0xe27fc76fcdedaa48, + 0x19735fd7f6bc20a6, + 0xe3ab90d042c84266, + 0xeb848e0f7f19547e, + 0x2503a1d065a497b9, + 0x0fef911191df895f, + ]); diff --git a/graviola/src/low/aarch64/edwards25519_scalarmuldouble.rs b/graviola/src/low/aarch64/edwards25519_scalarmuldouble.rs new file mode 100644 index 000000000..c9dc3eb82 --- /dev/null +++ b/graviola/src/low/aarch64/edwards25519_scalarmuldouble.rs @@ -0,0 +1,3033 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Double scalar multiplication for edwards25519, fresh and base point +// Input scalar[4], point[8], bscalar[4]; output res[8] +// +// extern void edwards25519_scalarmuldouble_alt +// (uint64_t res[static 8],const uint64_t scalar[static 4], +// const uint64_t point[static 8],const uint64_t bscalar[static 4]); +// +// Given scalar = n, point = P and bscalar = m, returns in res +// the point (X,Y) = n * P + m * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Both 256-bit coordinates of the input point P are implicitly +// reduced modulo 2^255-19 if they are not already in reduced form, +// but the conventional usage is that they *are* already reduced. +// The scalars can be arbitrary 256-bit numbers but may also be +// considered as implicitly reduced modulo the group order. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point, X3 = bscalar +// ---------------------------------------------------------------------------- + +// Size of individual field elements + +macro_rules! NUMSIZE { + () => { + "32" + }; +} + +// Stable home for the input result argument during the whole body + +macro_rules! res { + () => { + "x25" + }; +} + +// Additional pointer variables for local subroutines + +macro_rules! p0 { + () => { + "x22" + }; +} +macro_rules! p1 { + () => { + "x23" + }; +} +macro_rules! p2 { + () => { + "x24" + }; +} + +// Other variables that are only needed prior to the modular inverse. + +macro_rules! i { + () => { + "x19" + }; +} +macro_rules! bf { + () => { + "x20" + }; +} +macro_rules! cf { + () => { + "x21" + }; +} + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +macro_rules! resx { () => { Q!(res!() ", # (0 * " NUMSIZE!() ")") } } +macro_rules! resy { () => { Q!(res!() ", # (1 * " NUMSIZE!() ")") } } + +macro_rules! scalar { () => { Q!("sp, # (0 * " NUMSIZE!() ")") } } +macro_rules! bscalar { () => { Q!("sp, # (1 * " NUMSIZE!() ")") } } + +macro_rules! btabent { () => { Q!("sp, # (2 * " NUMSIZE!() ")") } } +macro_rules! acc { () => { Q!("sp, # (5 * " NUMSIZE!() ")") } } +macro_rules! acc_x { () => { Q!("sp, # (5 * " NUMSIZE!() ")") } } +macro_rules! acc_y { () => { Q!("sp, # (6 * " NUMSIZE!() ")") } } +macro_rules! acc_z { () => { Q!("sp, # (7 * " NUMSIZE!() ")") } } +macro_rules! acc_w { () => { Q!("sp, # (8 * " NUMSIZE!() ")") } } + +macro_rules! tabent { () => { Q!("sp, # (9 * " NUMSIZE!() ")") } } + +macro_rules! tab { () => { Q!("sp, # (13 * " NUMSIZE!() ")") } } + +// Total size to reserve on the stack (excluding local subroutines) + +macro_rules! NSPACE { () => { Q!("(45 * " NUMSIZE!() ")") } } + +// Sub-references used in local subroutines with local stack + +macro_rules! x_0 { () => { Q!(p0!() ", #0") } } +macro_rules! y_0 { () => { Q!(p0!() ", # " NUMSIZE!()) } } +macro_rules! z_0 { () => { Q!(p0!() ", # (2 * " NUMSIZE!() ")") } } +macro_rules! w_0 { () => { Q!(p0!() ", # (3 * " NUMSIZE!() ")") } } + +macro_rules! x_1 { () => { Q!(p1!() ", #0") } } +macro_rules! y_1 { () => { Q!(p1!() ", # " NUMSIZE!()) } } +macro_rules! z_1 { () => { Q!(p1!() ", # (2 * " NUMSIZE!() ")") } } +macro_rules! w_1 { () => { Q!(p1!() ", # (3 * " NUMSIZE!() ")") } } + +macro_rules! x_2 { () => { Q!(p2!() ", #0") } } +macro_rules! y_2 { () => { Q!(p2!() ", # " NUMSIZE!()) } } +macro_rules! z_2 { () => { Q!(p2!() ", # (2 * " NUMSIZE!() ")") } } +macro_rules! w_2 { () => { Q!(p2!() ", # (3 * " NUMSIZE!() ")") } } + +macro_rules! t0 { () => { Q!("sp, # (0 * " NUMSIZE!() ")") } } +macro_rules! t1 { () => { Q!("sp, # (1 * " NUMSIZE!() ")") } } +macro_rules! t2 { () => { Q!("sp, # (2 * " NUMSIZE!() ")") } } +macro_rules! t3 { () => { Q!("sp, # (3 * " NUMSIZE!() ")") } } +macro_rules! t4 { () => { Q!("sp, # (4 * " NUMSIZE!() ")") } } +macro_rules! t5 { () => { Q!("sp, # (5 * " NUMSIZE!() ")") } } + +// Load 64-bit immediate into a register + +macro_rules! movbig { + ($nn:expr, $n3:expr, $n2:expr, $n1:expr, $n0:expr) => { Q!( + "movz " $nn ", " $n0 ";\n" + "movk " $nn ", " $n1 ", lsl #16;\n" + "movk " $nn ", " $n2 ", lsl #32;\n" + "movk " $nn ", " $n3 ", lsl #48" + )} +} + +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. + +macro_rules! mul_p25519 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "ldp x3, x4, [" $P1 "];\n" + "ldp x7, x8, [" $P2 "];\n" + "mul x12, x3, x7;\n" + "umulh x13, x3, x7;\n" + "mul x11, x3, x8;\n" + "umulh x14, x3, x8;\n" + "adds x13, x13, x11;\n" + "ldp x9, x10, [" $P2 "+ 16];\n" + "mul x11, x3, x9;\n" + "umulh x15, x3, x9;\n" + "adcs x14, x14, x11;\n" + "mul x11, x3, x10;\n" + "umulh x16, x3, x10;\n" + "adcs x15, x15, x11;\n" + "adc x16, x16, xzr;\n" + "ldp x5, x6, [" $P1 "+ 16];\n" + "mul x11, x4, x7;\n" + "adds x13, x13, x11;\n" + "mul x11, x4, x8;\n" + "adcs x14, x14, x11;\n" + "mul x11, x4, x9;\n" + "adcs x15, x15, x11;\n" + "mul x11, x4, x10;\n" + "adcs x16, x16, x11;\n" + "umulh x3, x4, x10;\n" + "adc x3, x3, xzr;\n" + "umulh x11, x4, x7;\n" + "adds x14, x14, x11;\n" + "umulh x11, x4, x8;\n" + "adcs x15, x15, x11;\n" + "umulh x11, x4, x9;\n" + "adcs x16, x16, x11;\n" + "adc x3, x3, xzr;\n" + "mul x11, x5, x7;\n" + "adds x14, x14, x11;\n" + "mul x11, x5, x8;\n" + "adcs x15, x15, x11;\n" + "mul x11, x5, x9;\n" + "adcs x16, x16, x11;\n" + "mul x11, x5, x10;\n" + "adcs x3, x3, x11;\n" + "umulh x4, x5, x10;\n" + "adc x4, x4, xzr;\n" + "umulh x11, x5, x7;\n" + "adds x15, x15, x11;\n" + "umulh x11, x5, x8;\n" + "adcs x16, x16, x11;\n" + "umulh x11, x5, x9;\n" + "adcs x3, x3, x11;\n" + "adc x4, x4, xzr;\n" + "mul x11, x6, x7;\n" + "adds x15, x15, x11;\n" + "mul x11, x6, x8;\n" + "adcs x16, x16, x11;\n" + "mul x11, x6, x9;\n" + "adcs x3, x3, x11;\n" + "mul x11, x6, x10;\n" + "adcs x4, x4, x11;\n" + "umulh x5, x6, x10;\n" + "adc x5, x5, xzr;\n" + "umulh x11, x6, x7;\n" + "adds x16, x16, x11;\n" + "umulh x11, x6, x8;\n" + "adcs x3, x3, x11;\n" + "umulh x11, x6, x9;\n" + "adcs x4, x4, x11;\n" + "adc x5, x5, xzr;\n" + "mov x7, #0x26;\n" + "mul x11, x7, x16;\n" + "umulh x9, x7, x16;\n" + "adds x12, x12, x11;\n" + "mul x11, x7, x3;\n" + "umulh x3, x7, x3;\n" + "adcs x13, x13, x11;\n" + "mul x11, x7, x4;\n" + "umulh x4, x7, x4;\n" + "adcs x14, x14, x11;\n" + "mul x11, x7, x5;\n" + "umulh x5, x7, x5;\n" + "adcs x15, x15, x11;\n" + "cset x16, cs;\n" + "adds x15, x15, x4;\n" + "adc x16, x16, x5;\n" + "cmn x15, x15;\n" + "orr x15, x15, #0x8000000000000000;\n" + "adc x8, x16, x16;\n" + "mov x7, #0x13;\n" + "madd x11, x7, x8, x7;\n" + "adds x12, x12, x11;\n" + "adcs x13, x13, x9;\n" + "adcs x14, x14, x3;\n" + "adcs x15, x15, xzr;\n" + "csel x7, x7, xzr, cc;\n" + "subs x12, x12, x7;\n" + "sbcs x13, x13, xzr;\n" + "sbcs x14, x14, xzr;\n" + "sbc x15, x15, xzr;\n" + "and x15, x15, #0x7fffffffffffffff;\n" + "stp x12, x13, [" $P0 "];\n" + "stp x14, x15, [" $P0 "+ 16]" + )} +} + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +macro_rules! mul_4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "ldp x3, x4, [" $P1 "];\n" + "ldp x7, x8, [" $P2 "];\n" + "mul x12, x3, x7;\n" + "umulh x13, x3, x7;\n" + "mul x11, x3, x8;\n" + "umulh x14, x3, x8;\n" + "adds x13, x13, x11;\n" + "ldp x9, x10, [" $P2 "+ 16];\n" + "mul x11, x3, x9;\n" + "umulh x15, x3, x9;\n" + "adcs x14, x14, x11;\n" + "mul x11, x3, x10;\n" + "umulh x16, x3, x10;\n" + "adcs x15, x15, x11;\n" + "adc x16, x16, xzr;\n" + "ldp x5, x6, [" $P1 "+ 16];\n" + "mul x11, x4, x7;\n" + "adds x13, x13, x11;\n" + "mul x11, x4, x8;\n" + "adcs x14, x14, x11;\n" + "mul x11, x4, x9;\n" + "adcs x15, x15, x11;\n" + "mul x11, x4, x10;\n" + "adcs x16, x16, x11;\n" + "umulh x3, x4, x10;\n" + "adc x3, x3, xzr;\n" + "umulh x11, x4, x7;\n" + "adds x14, x14, x11;\n" + "umulh x11, x4, x8;\n" + "adcs x15, x15, x11;\n" + "umulh x11, x4, x9;\n" + "adcs x16, x16, x11;\n" + "adc x3, x3, xzr;\n" + "mul x11, x5, x7;\n" + "adds x14, x14, x11;\n" + "mul x11, x5, x8;\n" + "adcs x15, x15, x11;\n" + "mul x11, x5, x9;\n" + "adcs x16, x16, x11;\n" + "mul x11, x5, x10;\n" + "adcs x3, x3, x11;\n" + "umulh x4, x5, x10;\n" + "adc x4, x4, xzr;\n" + "umulh x11, x5, x7;\n" + "adds x15, x15, x11;\n" + "umulh x11, x5, x8;\n" + "adcs x16, x16, x11;\n" + "umulh x11, x5, x9;\n" + "adcs x3, x3, x11;\n" + "adc x4, x4, xzr;\n" + "mul x11, x6, x7;\n" + "adds x15, x15, x11;\n" + "mul x11, x6, x8;\n" + "adcs x16, x16, x11;\n" + "mul x11, x6, x9;\n" + "adcs x3, x3, x11;\n" + "mul x11, x6, x10;\n" + "adcs x4, x4, x11;\n" + "umulh x5, x6, x10;\n" + "adc x5, x5, xzr;\n" + "umulh x11, x6, x7;\n" + "adds x16, x16, x11;\n" + "umulh x11, x6, x8;\n" + "adcs x3, x3, x11;\n" + "umulh x11, x6, x9;\n" + "adcs x4, x4, x11;\n" + "adc x5, x5, xzr;\n" + "mov x7, #0x26;\n" + "mul x11, x7, x16;\n" + "umulh x9, x7, x16;\n" + "adds x12, x12, x11;\n" + "mul x11, x7, x3;\n" + "umulh x3, x7, x3;\n" + "adcs x13, x13, x11;\n" + "mul x11, x7, x4;\n" + "umulh x4, x7, x4;\n" + "adcs x14, x14, x11;\n" + "mul x11, x7, x5;\n" + "umulh x5, x7, x5;\n" + "adcs x15, x15, x11;\n" + "cset x16, cs;\n" + "adds x15, x15, x4;\n" + "adc x16, x16, x5;\n" + "cmn x15, x15;\n" + "bic x15, x15, #0x8000000000000000;\n" + "adc x8, x16, x16;\n" + "mov x7, #0x13;\n" + "mul x11, x7, x8;\n" + "adds x12, x12, x11;\n" + "adcs x13, x13, x9;\n" + "adcs x14, x14, x3;\n" + "adc x15, x15, xzr;\n" + "stp x12, x13, [" $P0 "];\n" + "stp x14, x15, [" $P0 "+ 16]" + )} +} + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +macro_rules! sqr_4 { + ($P0:expr, $P1:expr) => { Q!( + "ldp x2, x3, [" $P1 "];\n" + "mul x9, x2, x3;\n" + "umulh x10, x2, x3;\n" + "ldp x4, x5, [" $P1 "+ 16];\n" + "mul x11, x2, x5;\n" + "umulh x12, x2, x5;\n" + "mul x7, x2, x4;\n" + "umulh x6, x2, x4;\n" + "adds x10, x10, x7;\n" + "adcs x11, x11, x6;\n" + "mul x7, x3, x4;\n" + "umulh x6, x3, x4;\n" + "adc x6, x6, xzr;\n" + "adds x11, x11, x7;\n" + "mul x13, x4, x5;\n" + "umulh x14, x4, x5;\n" + "adcs x12, x12, x6;\n" + "mul x7, x3, x5;\n" + "umulh x6, x3, x5;\n" + "adc x6, x6, xzr;\n" + "adds x12, x12, x7;\n" + "adcs x13, x13, x6;\n" + "adc x14, x14, xzr;\n" + "adds x9, x9, x9;\n" + "adcs x10, x10, x10;\n" + "adcs x11, x11, x11;\n" + "adcs x12, x12, x12;\n" + "adcs x13, x13, x13;\n" + "adcs x14, x14, x14;\n" + "cset x6, cs;\n" + "umulh x7, x2, x2;\n" + "mul x8, x2, x2;\n" + "adds x9, x9, x7;\n" + "mul x7, x3, x3;\n" + "adcs x10, x10, x7;\n" + "umulh x7, x3, x3;\n" + "adcs x11, x11, x7;\n" + "mul x7, x4, x4;\n" + "adcs x12, x12, x7;\n" + "umulh x7, x4, x4;\n" + "adcs x13, x13, x7;\n" + "mul x7, x5, x5;\n" + "adcs x14, x14, x7;\n" + "umulh x7, x5, x5;\n" + "adc x6, x6, x7;\n" + "mov x3, #0x26;\n" + "mul x7, x3, x12;\n" + "umulh x4, x3, x12;\n" + "adds x8, x8, x7;\n" + "mul x7, x3, x13;\n" + "umulh x13, x3, x13;\n" + "adcs x9, x9, x7;\n" + "mul x7, x3, x14;\n" + "umulh x14, x3, x14;\n" + "adcs x10, x10, x7;\n" + "mul x7, x3, x6;\n" + "umulh x6, x3, x6;\n" + "adcs x11, x11, x7;\n" + "cset x12, cs;\n" + "adds x11, x11, x14;\n" + "adc x12, x12, x6;\n" + "cmn x11, x11;\n" + "bic x11, x11, #0x8000000000000000;\n" + "adc x2, x12, x12;\n" + "mov x3, #0x13;\n" + "mul x7, x3, x2;\n" + "adds x8, x8, x7;\n" + "adcs x9, x9, x4;\n" + "adcs x10, x10, x13;\n" + "adc x11, x11, xzr;\n" + "stp x8, x9, [" $P0 "];\n" + "stp x10, x11, [" $P0 "+ 16]" + )} +} + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +macro_rules! sub_twice4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "ldp x5, x6, [" $P1 "];\n" + "ldp x4, x3, [" $P2 "];\n" + "subs x5, x5, x4;\n" + "sbcs x6, x6, x3;\n" + "ldp x7, x8, [" $P1 "+ 16];\n" + "ldp x4, x3, [" $P2 "+ 16];\n" + "sbcs x7, x7, x4;\n" + "sbcs x8, x8, x3;\n" + "mov x4, #38;\n" + "csel x3, x4, xzr, lo;\n" + "subs x5, x5, x3;\n" + "sbcs x6, x6, xzr;\n" + "sbcs x7, x7, xzr;\n" + "sbc x8, x8, xzr;\n" + "stp x5, x6, [" $P0 "];\n" + "stp x7, x8, [" $P0 "+ 16]" + )} +} + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +macro_rules! add_twice4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "ldp x3, x4, [" $P1 "];\n" + "ldp x7, x8, [" $P2 "];\n" + "adds x3, x3, x7;\n" + "adcs x4, x4, x8;\n" + "ldp x5, x6, [" $P1 "+ 16];\n" + "ldp x7, x8, [" $P2 "+ 16];\n" + "adcs x5, x5, x7;\n" + "adcs x6, x6, x8;\n" + "mov x9, #38;\n" + "csel x9, x9, xzr, cs;\n" + "adds x3, x3, x9;\n" + "adcs x4, x4, xzr;\n" + "adcs x5, x5, xzr;\n" + "adc x6, x6, xzr;\n" + "stp x3, x4, [" $P0 "];\n" + "stp x5, x6, [" $P0 "+ 16]" + )} +} + +macro_rules! double_twice4 { + ($P0:expr, $P1:expr) => { Q!( + "ldp x3, x4, [" $P1 "];\n" + "adds x3, x3, x3;\n" + "adcs x4, x4, x4;\n" + "ldp x5, x6, [" $P1 "+ 16];\n" + "adcs x5, x5, x5;\n" + "adcs x6, x6, x6;\n" + "mov x9, #38;\n" + "csel x9, x9, xzr, cs;\n" + "adds x3, x3, x9;\n" + "adcs x4, x4, xzr;\n" + "adcs x5, x5, xzr;\n" + "adc x6, x6, xzr;\n" + "stp x3, x4, [" $P0 "];\n" + "stp x5, x6, [" $P0 "+ 16]" + )} +} + +// Load the constant k_25519 = 2 * d_25519 using immediate operations + +macro_rules! load_k25519 { + ($P0:expr) => { Q!( + "movz x0, #0xf159;\n" + "movz x1, #0xb156;\n" + "movz x2, #0xd130;\n" + "movz x3, #0xfce7;\n" + "movk x0, #0x26b2, lsl #16;\n" + "movk x1, #0x8283, lsl #16;\n" + "movk x2, #0xeef3, lsl #16;\n" + "movk x3, #0x56df, lsl #16;\n" + "movk x0, #0x9b94, lsl #32;\n" + "movk x1, #0x149a, lsl #32;\n" + "movk x2, #0x80f2, lsl #32;\n" + "movk x3, #0xd9dc, lsl #32;\n" + "movk x0, #0xebd6, lsl #48;\n" + "movk x1, #0x00e0, lsl #48;\n" + "movk x2, #0x198e, lsl #48;\n" + "movk x3, #0x2406, lsl #48;\n" + "stp x0, x1, [" $P0 "];\n" + "stp x2, x3, [" $P0 "+ 16]" + )} +} + +/// Double scalar multiplication for edwards25519, fresh and base point +/// +/// Input scalar[4], point[8], bscalar[4]; output res[8] +/// +/// Given scalar = n, point = P and bscalar = m, returns in res +/// the point (X,Y) = n * P + m * B where B = (...,4/5) is +/// the standard basepoint for the edwards25519 (Ed25519) curve. +/// +/// Both 256-bit coordinates of the input point P are implicitly +/// reduced modulo 2^255-19 if they are not already in reduced form, +/// but the conventional usage is that they *are* already reduced. +/// The scalars can be arbitrary 256-bit numbers but may also be +/// considered as implicitly reduced modulo the group order. +pub(crate) fn edwards25519_scalarmuldouble( + res: &mut [u64; 8], + scalar: &[u64; 4], + point: &[u64; 8], + bscalar: &[u64; 4], +) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + + // Save regs and make room for temporaries + + Q!(" stp " "x19, x20, [sp, -16] !"), + Q!(" stp " "x21, x22, [sp, -16] !"), + Q!(" stp " "x23, x24, [sp, -16] !"), + Q!(" stp " "x25, x30, [sp, -16] !"), + Q!(" sub " "sp, sp, # " NSPACE!()), + + // Move the output pointer to a stable place + + Q!(" mov " res!() ", x0"), + + // Copy scalars while recoding all 4-bit nybbles except the top + // one (bits 252..255) into signed 4-bit digits. This is essentially + // done just by adding the recoding constant 0x0888..888, after + // which all digits except the first have an implicit bias of -8, + // so 0 -> -8, 1 -> -7, ... 7 -> -1, 8 -> 0, 9 -> 1, ... 15 -> 7. + // (We could literally create 2s complement signed nybbles by + // XORing with the same constant 0x0888..888 afterwards, but it + // doesn't seem to make the end usage any simpler.) + // + // In order to ensure that the unrecoded top nybble (bits 252..255) + // does not become > 8 as a result of carries lower down from the + // recoding, we first (conceptually) subtract the group order iff + // the top digit of the scalar is > 2^63. In the implementation the + // reduction and recoding are combined by optionally using the + // modified recoding constant 0x0888...888 + (2^256 - group_order). + + movbig!("x4", "#0xc7f5", "#0x6fb5", "#0xa0d9", "#0xe920"), + movbig!("x5", "#0xe190", "#0xb993", "#0x70cb", "#0xa1d5"), + Q!(" mov " "x7, #0x8888888888888888"), + Q!(" sub " "x6, x7, #1"), + Q!(" bic " "x8, x7, #0xF000000000000000"), + + Q!(" ldp " "x10, x11, [x3]"), + Q!(" ldp " "x12, x13, [x3, #16]"), + Q!(" mov " "x3, 0x8000000000000000"), + Q!(" cmp " "x3, x13"), + Q!(" csel " "x14, x7, x4, cs"), + Q!(" csel " "x15, x7, x5, cs"), + Q!(" csel " "x16, x7, x6, cs"), + Q!(" csel " "x17, x8, x7, cs"), + Q!(" adds " "x10, x10, x14"), + Q!(" adcs " "x11, x11, x15"), + Q!(" adcs " "x12, x12, x16"), + Q!(" adc " "x13, x13, x17"), + Q!(" stp " "x10, x11, [" bscalar!() "]"), + Q!(" stp " "x12, x13, [" bscalar!() "+ 16]"), + + Q!(" ldp " "x10, x11, [x1]"), + Q!(" ldp " "x12, x13, [x1, #16]"), + Q!(" mov " "x3, 0x8000000000000000"), + Q!(" cmp " "x3, x13"), + Q!(" csel " "x14, x7, x4, cs"), + Q!(" csel " "x15, x7, x5, cs"), + Q!(" csel " "x16, x7, x6, cs"), + Q!(" csel " "x17, x8, x7, cs"), + Q!(" adds " "x10, x10, x14"), + Q!(" adcs " "x11, x11, x15"), + Q!(" adcs " "x12, x12, x16"), + Q!(" adc " "x13, x13, x17"), + Q!(" stp " "x10, x11, [" scalar!() "]"), + Q!(" stp " "x12, x13, [" scalar!() "+ 16]"), + + // Create table of multiples 1..8 of the general input point at "tab". + // Reduce the input coordinates x and y modulo 2^256 - 38 first, for the + // sake of definiteness; this is the reduction that will be maintained. + // We could slightly optimize the additions because we know the input + // point is affine (so Z = 1), but it doesn't seem worth the complication. + + Q!(" ldp " "x10, x11, [x2]"), + Q!(" ldp " "x12, x13, [x2, #16]"), + Q!(" adds " "x14, x10, #38"), + Q!(" adcs " "x15, x11, xzr"), + Q!(" adcs " "x16, x12, xzr"), + Q!(" adcs " "x17, x13, xzr"), + Q!(" csel " "x10, x14, x10, cs"), + Q!(" csel " "x11, x15, x11, cs"), + Q!(" csel " "x12, x16, x12, cs"), + Q!(" csel " "x13, x17, x13, cs"), + Q!(" stp " "x10, x11, [" tab!() "]"), + Q!(" stp " "x12, x13, [" tab!() "+ 16]"), + + Q!(" ldp " "x10, x11, [x2, #32]"), + Q!(" ldp " "x12, x13, [x2, #48]"), + Q!(" adds " "x14, x10, #38"), + Q!(" adcs " "x15, x11, xzr"), + Q!(" adcs " "x16, x12, xzr"), + Q!(" adcs " "x17, x13, xzr"), + Q!(" csel " "x10, x14, x10, cs"), + Q!(" csel " "x11, x15, x11, cs"), + Q!(" csel " "x12, x16, x12, cs"), + Q!(" csel " "x13, x17, x13, cs"), + Q!(" stp " "x10, x11, [" tab!() "+ 32]"), + Q!(" stp " "x12, x13, [" tab!() "+ 48]"), + + Q!(" mov " "x1, #1"), + Q!(" stp " "x1, xzr, [" tab!() "+ 64]"), + Q!(" stp " "xzr, xzr, [" tab!() "+ 80]"), + + Q!(" add " p0!() ", " tab!() "+ 96"), + Q!(" add " p1!() ", " tab!()), + Q!(" add " p2!() ", " tab!() "+ 32"), + mul_4!(x_0!(), x_1!(), x_2!()), + + // Multiple 2 + + Q!(" add " p0!() ", " tab!() "+ 1 * 128"), + Q!(" add " p1!() ", " tab!()), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epdouble", 2, After)), + + // Multiple 3 + + Q!(" add " p0!() ", " tab!() "+ 2 * 128"), + Q!(" add " p1!() ", " tab!()), + Q!(" add " p2!() ", " tab!() "+ 1 * 128"), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epadd", 3, After)), + + // Multiple 4 + + Q!(" add " p0!() ", " tab!() "+ 3 * 128"), + Q!(" add " p1!() ", " tab!() "+ 1 * 128"), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epdouble", 2, After)), + + // Multiple 5 + + Q!(" add " p0!() ", " tab!() "+ 4 * 128"), + Q!(" add " p1!() ", " tab!()), + Q!(" add " p2!() ", " tab!() "+ 3 * 128"), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epadd", 3, After)), + + // Multiple 6 + + Q!(" add " p0!() ", " tab!() "+ 5 * 128"), + Q!(" add " p1!() ", " tab!() "+ 2 * 128"), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epdouble", 2, After)), + + // Multiple 7 + + Q!(" add " p0!() ", " tab!() "+ 6 * 128"), + Q!(" add " p1!() ", " tab!()), + Q!(" add " p2!() ", " tab!() "+ 5 * 128"), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epadd", 3, After)), + + // Multiple 8 + + Q!(" add " p0!() ", " tab!() "+ 7 * 128"), + Q!(" add " p1!() ", " tab!() "+ 3 * 128"), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epdouble", 2, After)), + + // Handle the initialization, starting the loop counter at i = 252 + // and initializing acc to the sum of the table entries for the + // top nybbles of the scalars (the ones with no implicit -8 bias). + + Q!(" mov " i!() ", #252"), + + // Index for btable entry... + + Q!(" ldr " "x0, [" bscalar!() "+ 24]"), + Q!(" lsr " bf!() ", x0, #60"), + + // ...and constant-time indexing based on that index + + Q!(" adrp " "x14, " PageRef!("edwards25519_scalarmuldouble_alt_table")), + + Q!(" mov " "x0, #1"), + Q!(" mov " "x1, xzr"), + Q!(" mov " "x2, xzr"), + Q!(" mov " "x3, xzr"), + Q!(" mov " "x4, #1"), + Q!(" mov " "x5, xzr"), + Q!(" mov " "x6, xzr"), + Q!(" mov " "x7, xzr"), + Q!(" mov " "x8, xzr"), + Q!(" mov " "x9, xzr"), + Q!(" mov " "x10, xzr"), + Q!(" mov " "x11, xzr"), + + Q!(" cmp " bf!() ", #1"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #2"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #3"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #4"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #5"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #6"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #7"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #8"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + + Q!(" stp " "x0, x1, [" btabent!() "]"), + Q!(" stp " "x2, x3, [" btabent!() "+ 16]"), + Q!(" stp " "x4, x5, [" btabent!() "+ 32]"), + Q!(" stp " "x6, x7, [" btabent!() "+ 48]"), + Q!(" stp " "x8, x9, [" btabent!() "+ 64]"), + Q!(" stp " "x10, x11, [" btabent!() "+ 80]"), + + // Index for table entry... + + Q!(" ldr " "x0, [" scalar!() "+ 24]"), + Q!(" lsr " bf!() ", x0, #60"), + + // ...and constant-time indexing based on that index + + Q!(" add " p0!() ", " tab!()), + + Q!(" mov " "x0, xzr"), + Q!(" mov " "x1, xzr"), + Q!(" mov " "x2, xzr"), + Q!(" mov " "x3, xzr"), + Q!(" mov " "x4, #1"), + Q!(" mov " "x5, xzr"), + Q!(" mov " "x6, xzr"), + Q!(" mov " "x7, xzr"), + Q!(" mov " "x8, #1"), + Q!(" mov " "x9, xzr"), + Q!(" mov " "x10, xzr"), + Q!(" mov " "x11, xzr"), + Q!(" mov " "x12, xzr"), + Q!(" mov " "x13, xzr"), + Q!(" mov " "x14, xzr"), + Q!(" mov " "x15, xzr"), + + Q!(" cmp " bf!() ", #1"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #2"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #3"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #4"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #5"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #6"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #7"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #8"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + + Q!(" stp " "x0, x1, [" tabent!() "]"), + Q!(" stp " "x2, x3, [" tabent!() "+ 16]"), + Q!(" stp " "x4, x5, [" tabent!() "+ 32]"), + Q!(" stp " "x6, x7, [" tabent!() "+ 48]"), + Q!(" stp " "x8, x9, [" tabent!() "+ 64]"), + Q!(" stp " "x10, x11, [" tabent!() "+ 80]"), + Q!(" stp " "x12, x13, [" tabent!() "+ 96]"), + Q!(" stp " "x14, x15, [" tabent!() "+ 112]"), + + // Add those elements to initialize the accumulator for bit position 252 + + Q!(" add " p0!() ", " acc!()), + Q!(" add " p1!() ", " tabent!()), + Q!(" add " p2!() ", " btabent!()), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_pepadd", 4, After)), + + // Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint + // Start with i = 252 for bits 248..251 and go down four at a time to 3..0 + + Q!(Label!("edwards25519_scalarmuldouble_alt_loop", 5) ":"), + + Q!(" sub " i!() ", " i!() ", #4"), + + // Double to acc' = 2 * acc + + Q!(" add " p0!() ", " acc!()), + Q!(" add " p1!() ", " acc!()), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_pdouble", 6, After)), + + // Get btable entry, first getting the adjusted bitfield... + + Q!(" lsr " "x0, " i!() ", #6"), + Q!(" add " "x1, " bscalar!()), + Q!(" ldr " "x2, [x1, x0, lsl #3]"), + Q!(" lsr " "x3, x2, " i!()), + Q!(" and " "x0, x3, #15"), + Q!(" subs " bf!() ", x0, #8"), + Q!(" cneg " bf!() ", " bf!() ", cc"), + Q!(" csetm " cf!() ", cc"), + + // ... then doing constant-time lookup with the appropriate index... + + Q!(" adrp " "x14, " PageRef!("edwards25519_scalarmuldouble_alt_table")), + + Q!(" mov " "x0, #1"), + Q!(" mov " "x1, xzr"), + Q!(" mov " "x2, xzr"), + Q!(" mov " "x3, xzr"), + Q!(" mov " "x4, #1"), + Q!(" mov " "x5, xzr"), + Q!(" mov " "x6, xzr"), + Q!(" mov " "x7, xzr"), + Q!(" mov " "x8, xzr"), + Q!(" mov " "x9, xzr"), + Q!(" mov " "x10, xzr"), + Q!(" mov " "x11, xzr"), + + Q!(" cmp " bf!() ", #1"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #2"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #3"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #4"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #5"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #6"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #7"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + Q!(" add " "x14, x14, #96"), + + Q!(" cmp " bf!() ", #8"), + Q!(" ldp " "x12, x13, [x14]"), + Q!(" csel " "x0, x0, x12, ne"), + Q!(" csel " "x1, x1, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #16]"), + Q!(" csel " "x2, x2, x12, ne"), + Q!(" csel " "x3, x3, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #32]"), + Q!(" csel " "x4, x4, x12, ne"), + Q!(" csel " "x5, x5, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #48]"), + Q!(" csel " "x6, x6, x12, ne"), + Q!(" csel " "x7, x7, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #64]"), + Q!(" csel " "x8, x8, x12, ne"), + Q!(" csel " "x9, x9, x13, ne"), + Q!(" ldp " "x12, x13, [x14, #80]"), + Q!(" csel " "x10, x10, x12, ne"), + Q!(" csel " "x11, x11, x13, ne"), + + // ... then optionally negating before storing. The table entry + // is in precomputed form and we currently have + // + // [x3;x2;x1;x0] = y - x + // [x7;x6;x5;x4] = x + y + // [x11;x10;x9;x8] = 2 * d * x * y + // + // Negation for Edwards curves is -(x,y) = (-x,y), which in this modified + // form amounts to swapping the first two fields and negating the third. + // The negation does not always fully reduce even mod 2^256-38 in the zero + // case, instead giving -0 = 2^256-38. But that is fine since the result is + // always fed to a multiplication inside the "pepadd" function below that + // handles any 256-bit input. + + Q!(" cmp " cf!() ", xzr"), + + Q!(" csel " "x12, x0, x4, eq"), + Q!(" csel " "x4, x0, x4, ne"), + Q!(" csel " "x13, x1, x5, eq"), + Q!(" csel " "x5, x1, x5, ne"), + Q!(" csel " "x14, x2, x6, eq"), + Q!(" csel " "x6, x2, x6, ne"), + Q!(" csel " "x15, x3, x7, eq"), + Q!(" csel " "x7, x3, x7, ne"), + + Q!(" eor " "x8, x8, " cf!()), + Q!(" eor " "x9, x9, " cf!()), + Q!(" eor " "x10, x10, " cf!()), + Q!(" eor " "x11, x11, " cf!()), + Q!(" mov " "x0, #37"), + Q!(" and " "x0, x0, " cf!()), + Q!(" subs " "x8, x8, x0"), + Q!(" sbcs " "x9, x9, xzr"), + Q!(" sbcs " "x10, x10, xzr"), + Q!(" sbc " "x11, x11, xzr"), + + Q!(" stp " "x12, x13, [" btabent!() "]"), + Q!(" stp " "x14, x15, [" btabent!() "+ 16]"), + Q!(" stp " "x4, x5, [" btabent!() "+ 32]"), + Q!(" stp " "x6, x7, [" btabent!() "+ 48]"), + Q!(" stp " "x8, x9, [" btabent!() "+ 64]"), + Q!(" stp " "x10, x11, [" btabent!() "+ 80]"), + + // Get table entry, first getting the adjusted bitfield... + + Q!(" lsr " "x0, " i!() ", #6"), + Q!(" ldr " "x1, [sp, x0, lsl #3]"), + Q!(" lsr " "x2, x1, " i!()), + Q!(" and " "x0, x2, #15"), + Q!(" subs " bf!() ", x0, #8"), + Q!(" cneg " bf!() ", " bf!() ", cc"), + Q!(" csetm " cf!() ", cc"), + + // ... then getting the unadjusted table entry + + Q!(" add " p0!() ", " tab!()), + + Q!(" mov " "x0, xzr"), + Q!(" mov " "x1, xzr"), + Q!(" mov " "x2, xzr"), + Q!(" mov " "x3, xzr"), + Q!(" mov " "x4, #1"), + Q!(" mov " "x5, xzr"), + Q!(" mov " "x6, xzr"), + Q!(" mov " "x7, xzr"), + Q!(" mov " "x8, #1"), + Q!(" mov " "x9, xzr"), + Q!(" mov " "x10, xzr"), + Q!(" mov " "x11, xzr"), + Q!(" mov " "x12, xzr"), + Q!(" mov " "x13, xzr"), + Q!(" mov " "x14, xzr"), + Q!(" mov " "x15, xzr"), + + Q!(" cmp " bf!() ", #1"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #2"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #3"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #4"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #5"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #6"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #7"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + Q!(" add " p0!() ", " p0!() ", #128"), + + Q!(" cmp " bf!() ", #8"), + Q!(" ldp " "x16, x17, [" p0!() "]"), + Q!(" csel " "x0, x0, x16, ne"), + Q!(" csel " "x1, x1, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #16]"), + Q!(" csel " "x2, x2, x16, ne"), + Q!(" csel " "x3, x3, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #32]"), + Q!(" csel " "x4, x4, x16, ne"), + Q!(" csel " "x5, x5, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #48]"), + Q!(" csel " "x6, x6, x16, ne"), + Q!(" csel " "x7, x7, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #64]"), + Q!(" csel " "x8, x8, x16, ne"), + Q!(" csel " "x9, x9, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #80]"), + Q!(" csel " "x10, x10, x16, ne"), + Q!(" csel " "x11, x11, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #96]"), + Q!(" csel " "x12, x12, x16, ne"), + Q!(" csel " "x13, x13, x17, ne"), + Q!(" ldp " "x16, x17, [" p0!() ", #112]"), + Q!(" csel " "x14, x14, x16, ne"), + Q!(" csel " "x15, x15, x17, ne"), + + // ... then optionally negating before storing. This time the table + // entry is extended-projective, and is in registers thus: + // + // [x3;x2;x1;x0] = X + // [x7;x6;x5;x4] = Y + // [x11;x10;x9;x8] = Z + // [x15;x14;x13;x12] = W + // + // This time we just need to negate the X and the W fields. + // The crude way negation is done can result in values of X or W + // (when initially zero before negation) being exactly equal to + // 2^256-38, but the "pepadd" function handles that correctly. + + Q!(" eor " "x0, x0, " cf!()), + Q!(" eor " "x1, x1, " cf!()), + Q!(" eor " "x2, x2, " cf!()), + Q!(" eor " "x3, x3, " cf!()), + Q!(" mov " "x16, #37"), + Q!(" and " "x16, x16, " cf!()), + Q!(" subs " "x0, x0, x16"), + Q!(" sbcs " "x1, x1, xzr"), + Q!(" sbcs " "x2, x2, xzr"), + Q!(" sbc " "x3, x3, xzr"), + + Q!(" eor " "x12, x12, " cf!()), + Q!(" eor " "x13, x13, " cf!()), + Q!(" eor " "x14, x14, " cf!()), + Q!(" eor " "x15, x15, " cf!()), + Q!(" subs " "x12, x12, x16"), + Q!(" sbcs " "x13, x13, xzr"), + Q!(" sbcs " "x14, x14, xzr"), + Q!(" sbc " "x15, x15, xzr"), + + Q!(" stp " "x0, x1, [" tabent!() "]"), + Q!(" stp " "x2, x3, [" tabent!() "+ 16]"), + Q!(" stp " "x4, x5, [" tabent!() "+ 32]"), + Q!(" stp " "x6, x7, [" tabent!() "+ 48]"), + Q!(" stp " "x8, x9, [" tabent!() "+ 64]"), + Q!(" stp " "x10, x11, [" tabent!() "+ 80]"), + Q!(" stp " "x12, x13, [" tabent!() "+ 96]"), + Q!(" stp " "x14, x15, [" tabent!() "+ 112]"), + + // Double to acc' = 4 * acc + + Q!(" add " p0!() ", " acc!()), + Q!(" add " p1!() ", " acc!()), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_pdouble", 6, After)), + + // Add tabent := tabent + btabent + + Q!(" add " p0!() ", " tabent!()), + Q!(" add " p1!() ", " tabent!()), + Q!(" add " p2!() ", " btabent!()), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_pepadd", 4, After)), + + // Double to acc' = 8 * acc + + Q!(" add " p0!() ", " acc!()), + Q!(" add " p1!() ", " acc!()), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_pdouble", 6, After)), + + // Double to acc' = 16 * acc + + Q!(" add " p0!() ", " acc!()), + Q!(" add " p1!() ", " acc!()), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epdouble", 2, After)), + + // Add table entry, acc := acc + tabent + + Q!(" add " p0!() ", " acc!()), + Q!(" add " p1!() ", " acc!()), + Q!(" add " p2!() ", " tabent!()), + Q!(" bl " Label!("edwards25519_scalarmuldouble_alt_epadd", 3, After)), + + // Loop down + + Q!(" cbnz " i!() ", " Label!("edwards25519_scalarmuldouble_alt_loop", 5, Before)), + + // Modular inverse setup + + Q!(" add " "x0, " tabent!()), + Q!(" add " "x1, " acc!() "+ 64"), + + // Inline copy of bignum_inv_p25519, identical except for stripping out + // the prologue and epilogue saving and restoring registers and making + // and reclaiming room on the stack. For more details and explanations see + // "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for + // its own temporaries is 128 bytes, so it has no effect on variables + // that are needed in the rest of our computation here: res, acc, tabent. + + Q!(" mov " "x20, x0"), + Q!(" mov " "x10, #0xffffffffffffffed"), + Q!(" mov " "x11, #0xffffffffffffffff"), + Q!(" stp " "x10, x11, [sp]"), + Q!(" mov " "x12, #0x7fffffffffffffff"), + Q!(" stp " "x11, x12, [sp, #16]"), + Q!(" ldp " "x2, x3, [x1]"), + Q!(" ldp " "x4, x5, [x1, #16]"), + Q!(" mov " "x7, #0x13"), + Q!(" lsr " "x6, x5, #63"), + Q!(" madd " "x6, x7, x6, x7"), + Q!(" adds " "x2, x2, x6"), + Q!(" adcs " "x3, x3, xzr"), + Q!(" adcs " "x4, x4, xzr"), + Q!(" orr " "x5, x5, #0x8000000000000000"), + Q!(" adcs " "x5, x5, xzr"), + Q!(" csel " "x6, x7, xzr, cc"), + Q!(" subs " "x2, x2, x6"), + Q!(" sbcs " "x3, x3, xzr"), + Q!(" sbcs " "x4, x4, xzr"), + Q!(" sbc " "x5, x5, xzr"), + Q!(" and " "x5, x5, #0x7fffffffffffffff"), + Q!(" stp " "x2, x3, [sp, #32]"), + Q!(" stp " "x4, x5, [sp, #48]"), + Q!(" stp " "xzr, xzr, [sp, #64]"), + Q!(" stp " "xzr, xzr, [sp, #80]"), + Q!(" mov " "x10, #0x2099"), + Q!(" movk " "x10, #0x7502, lsl #16"), + Q!(" movk " "x10, #0x9e23, lsl #32"), + Q!(" movk " "x10, #0xa0f9, lsl #48"), + Q!(" mov " "x11, #0x2595"), + Q!(" movk " "x11, #0x1d13, lsl #16"), + Q!(" movk " "x11, #0x8f3f, lsl #32"), + Q!(" movk " "x11, #0xa8c6, lsl #48"), + Q!(" mov " "x12, #0x5242"), + Q!(" movk " "x12, #0x5ac, lsl #16"), + Q!(" movk " "x12, #0x8938, lsl #32"), + Q!(" movk " "x12, #0x6c6c, lsl #48"), + Q!(" mov " "x13, #0x615"), + Q!(" movk " "x13, #0x4177, lsl #16"), + Q!(" movk " "x13, #0x8b2, lsl #32"), + Q!(" movk " "x13, #0x2765, lsl #48"), + Q!(" stp " "x10, x11, [sp, #96]"), + Q!(" stp " "x12, x13, [sp, #112]"), + Q!(" mov " "x21, #0xa"), + Q!(" mov " "x22, #0x1"), + Q!(" b " Label!("edwards25519_scalarmuldouble_alt_invmidloop", 7, After)), + Q!(Label!("edwards25519_scalarmuldouble_alt_invloop", 8) ":"), + Q!(" cmp " "x10, xzr"), + Q!(" csetm " "x14, mi"), + Q!(" cneg " "x10, x10, mi"), + Q!(" cmp " "x11, xzr"), + Q!(" csetm " "x15, mi"), + Q!(" cneg " "x11, x11, mi"), + Q!(" cmp " "x12, xzr"), + Q!(" csetm " "x16, mi"), + Q!(" cneg " "x12, x12, mi"), + Q!(" cmp " "x13, xzr"), + Q!(" csetm " "x17, mi"), + Q!(" cneg " "x13, x13, mi"), + Q!(" and " "x0, x10, x14"), + Q!(" and " "x1, x11, x15"), + Q!(" add " "x9, x0, x1"), + Q!(" and " "x0, x12, x16"), + Q!(" and " "x1, x13, x17"), + Q!(" add " "x19, x0, x1"), + Q!(" ldr " "x7, [sp]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x4, x9, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" ldr " "x8, [sp, #32]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x4, x4, x0"), + Q!(" adc " "x2, x2, x1"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x5, x19, x0"), + Q!(" adc " "x3, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x7, [sp, #8]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x6, xzr, x1"), + Q!(" ldr " "x8, [sp, #40]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x6, x6, x1"), + Q!(" extr " "x4, x2, x4, #59"), + Q!(" str " "x4, [sp]"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x3, x3, x0"), + Q!(" adc " "x4, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x3, x3, x0"), + Q!(" adc " "x4, x4, x1"), + Q!(" extr " "x5, x3, x5, #59"), + Q!(" str " "x5, [sp, #32]"), + Q!(" ldr " "x7, [sp, #16]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x6, x6, x0"), + Q!(" adc " "x5, xzr, x1"), + Q!(" ldr " "x8, [sp, #48]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x6, x6, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" extr " "x2, x6, x2, #59"), + Q!(" str " "x2, [sp, #8]"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x4, x4, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x4, x4, x0"), + Q!(" adc " "x2, x2, x1"), + Q!(" extr " "x3, x4, x3, #59"), + Q!(" str " "x3, [sp, #40]"), + Q!(" ldr " "x7, [sp, #24]"), + Q!(" eor " "x1, x7, x14"), + Q!(" asr " "x3, x1, #63"), + Q!(" and " "x3, x3, x10"), + Q!(" neg " "x3, x3"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x8, [sp, #56]"), + Q!(" eor " "x1, x8, x15"), + Q!(" asr " "x0, x1, #63"), + Q!(" and " "x0, x0, x11"), + Q!(" sub " "x3, x3, x0"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" extr " "x6, x5, x6, #59"), + Q!(" str " "x6, [sp, #16]"), + Q!(" extr " "x5, x3, x5, #59"), + Q!(" str " "x5, [sp, #24]"), + Q!(" eor " "x1, x7, x16"), + Q!(" asr " "x5, x1, #63"), + Q!(" and " "x5, x5, x12"), + Q!(" neg " "x5, x5"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" asr " "x0, x1, #63"), + Q!(" and " "x0, x0, x13"), + Q!(" sub " "x5, x5, x0"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" extr " "x4, x2, x4, #59"), + Q!(" str " "x4, [sp, #48]"), + Q!(" extr " "x2, x5, x2, #59"), + Q!(" str " "x2, [sp, #56]"), + Q!(" ldr " "x7, [sp, #64]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x4, x9, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" ldr " "x8, [sp, #96]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x4, x4, x0"), + Q!(" str " "x4, [sp, #64]"), + Q!(" adc " "x2, x2, x1"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x5, x19, x0"), + Q!(" adc " "x3, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x5, x5, x0"), + Q!(" str " "x5, [sp, #96]"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x7, [sp, #72]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x6, xzr, x1"), + Q!(" ldr " "x8, [sp, #104]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x2, x2, x0"), + Q!(" str " "x2, [sp, #72]"), + Q!(" adc " "x6, x6, x1"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x3, x3, x0"), + Q!(" adc " "x4, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x3, x3, x0"), + Q!(" str " "x3, [sp, #104]"), + Q!(" adc " "x4, x4, x1"), + Q!(" ldr " "x7, [sp, #80]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x6, x6, x0"), + Q!(" adc " "x5, xzr, x1"), + Q!(" ldr " "x8, [sp, #112]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x6, x6, x0"), + Q!(" str " "x6, [sp, #80]"), + Q!(" adc " "x5, x5, x1"), + Q!(" eor " "x1, x7, x16"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x4, x4, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x4, x4, x0"), + Q!(" str " "x4, [sp, #112]"), + Q!(" adc " "x2, x2, x1"), + Q!(" ldr " "x7, [sp, #88]"), + Q!(" eor " "x1, x7, x14"), + Q!(" and " "x3, x14, x10"), + Q!(" neg " "x3, x3"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x8, [sp, #120]"), + Q!(" eor " "x1, x8, x15"), + Q!(" and " "x0, x15, x11"), + Q!(" sub " "x3, x3, x0"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" extr " "x6, x3, x5, #63"), + Q!(" ldp " "x0, x1, [sp, #64]"), + Q!(" add " "x6, x6, x3, asr #63"), + Q!(" mov " "x3, #0x13"), + Q!(" mul " "x4, x6, x3"), + Q!(" add " "x5, x5, x6, lsl #63"), + Q!(" smulh " "x3, x6, x3"), + Q!(" ldr " "x6, [sp, #80]"), + Q!(" adds " "x0, x0, x4"), + Q!(" adcs " "x1, x1, x3"), + Q!(" asr " "x3, x3, #63"), + Q!(" adcs " "x6, x6, x3"), + Q!(" adc " "x5, x5, x3"), + Q!(" stp " "x0, x1, [sp, #64]"), + Q!(" stp " "x6, x5, [sp, #80]"), + Q!(" eor " "x1, x7, x16"), + Q!(" and " "x5, x16, x12"), + Q!(" neg " "x5, x5"), + Q!(" mul " "x0, x1, x12"), + Q!(" umulh " "x1, x1, x12"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" eor " "x1, x8, x17"), + Q!(" and " "x0, x17, x13"), + Q!(" sub " "x5, x5, x0"), + Q!(" mul " "x0, x1, x13"), + Q!(" umulh " "x1, x1, x13"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x5, x5, x1"), + Q!(" extr " "x6, x5, x2, #63"), + Q!(" ldp " "x0, x1, [sp, #96]"), + Q!(" add " "x6, x6, x5, asr #63"), + Q!(" mov " "x5, #0x13"), + Q!(" mul " "x4, x6, x5"), + Q!(" add " "x2, x2, x6, lsl #63"), + Q!(" smulh " "x5, x6, x5"), + Q!(" ldr " "x3, [sp, #112]"), + Q!(" adds " "x0, x0, x4"), + Q!(" adcs " "x1, x1, x5"), + Q!(" asr " "x5, x5, #63"), + Q!(" adcs " "x3, x3, x5"), + Q!(" adc " "x2, x2, x5"), + Q!(" stp " "x0, x1, [sp, #96]"), + Q!(" stp " "x3, x2, [sp, #112]"), + Q!(Label!("edwards25519_scalarmuldouble_alt_invmidloop", 7) ":"), + Q!(" mov " "x1, x22"), + Q!(" ldr " "x2, [sp]"), + Q!(" ldr " "x3, [sp, #32]"), + Q!(" and " "x4, x2, #0xfffff"), + Q!(" orr " "x4, x4, #0xfffffe0000000000"), + Q!(" and " "x5, x3, #0xfffff"), + Q!(" orr " "x5, x5, #0xc000000000000000"), + Q!(" tst " "x5, #0x1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" add " "x8, x4, #0x100, lsl #12"), + Q!(" sbfx " "x8, x8, #21, #21"), + Q!(" mov " "x11, #0x100000"), + Q!(" add " "x11, x11, x11, lsl #21"), + Q!(" add " "x9, x4, x11"), + Q!(" asr " "x9, x9, #42"), + Q!(" add " "x10, x5, #0x100, lsl #12"), + Q!(" sbfx " "x10, x10, #21, #21"), + Q!(" add " "x11, x5, x11"), + Q!(" asr " "x11, x11, #42"), + Q!(" mul " "x6, x8, x2"), + Q!(" mul " "x7, x9, x3"), + Q!(" mul " "x2, x10, x2"), + Q!(" mul " "x3, x11, x3"), + Q!(" add " "x4, x6, x7"), + Q!(" add " "x5, x2, x3"), + Q!(" asr " "x2, x4, #20"), + Q!(" asr " "x3, x5, #20"), + Q!(" and " "x4, x2, #0xfffff"), + Q!(" orr " "x4, x4, #0xfffffe0000000000"), + Q!(" and " "x5, x3, #0xfffff"), + Q!(" orr " "x5, x5, #0xc000000000000000"), + Q!(" tst " "x5, #0x1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" add " "x12, x4, #0x100, lsl #12"), + Q!(" sbfx " "x12, x12, #21, #21"), + Q!(" mov " "x15, #0x100000"), + Q!(" add " "x15, x15, x15, lsl #21"), + Q!(" add " "x13, x4, x15"), + Q!(" asr " "x13, x13, #42"), + Q!(" add " "x14, x5, #0x100, lsl #12"), + Q!(" sbfx " "x14, x14, #21, #21"), + Q!(" add " "x15, x5, x15"), + Q!(" asr " "x15, x15, #42"), + Q!(" mul " "x6, x12, x2"), + Q!(" mul " "x7, x13, x3"), + Q!(" mul " "x2, x14, x2"), + Q!(" mul " "x3, x15, x3"), + Q!(" add " "x4, x6, x7"), + Q!(" add " "x5, x2, x3"), + Q!(" asr " "x2, x4, #20"), + Q!(" asr " "x3, x5, #20"), + Q!(" and " "x4, x2, #0xfffff"), + Q!(" orr " "x4, x4, #0xfffffe0000000000"), + Q!(" and " "x5, x3, #0xfffff"), + Q!(" orr " "x5, x5, #0xc000000000000000"), + Q!(" tst " "x5, #0x1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" mul " "x2, x12, x8"), + Q!(" mul " "x3, x12, x9"), + Q!(" mul " "x6, x14, x8"), + Q!(" mul " "x7, x14, x9"), + Q!(" madd " "x8, x13, x10, x2"), + Q!(" madd " "x9, x13, x11, x3"), + Q!(" madd " "x16, x15, x10, x6"), + Q!(" madd " "x17, x15, x11, x7"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" tst " "x5, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" csel " "x6, x4, xzr, ne"), + Q!(" ccmp " "x1, xzr, #0x8, ne"), + Q!(" cneg " "x1, x1, ge"), + Q!(" cneg " "x6, x6, ge"), + Q!(" csel " "x4, x5, x4, ge"), + Q!(" add " "x5, x5, x6"), + Q!(" add " "x1, x1, #0x2"), + Q!(" asr " "x5, x5, #1"), + Q!(" add " "x12, x4, #0x100, lsl #12"), + Q!(" sbfx " "x12, x12, #22, #21"), + Q!(" mov " "x15, #0x100000"), + Q!(" add " "x15, x15, x15, lsl #21"), + Q!(" add " "x13, x4, x15"), + Q!(" asr " "x13, x13, #43"), + Q!(" add " "x14, x5, #0x100, lsl #12"), + Q!(" sbfx " "x14, x14, #22, #21"), + Q!(" add " "x15, x5, x15"), + Q!(" asr " "x15, x15, #43"), + Q!(" mneg " "x2, x12, x8"), + Q!(" mneg " "x3, x12, x9"), + Q!(" mneg " "x4, x14, x8"), + Q!(" mneg " "x5, x14, x9"), + Q!(" msub " "x10, x13, x16, x2"), + Q!(" msub " "x11, x13, x17, x3"), + Q!(" msub " "x12, x15, x16, x4"), + Q!(" msub " "x13, x15, x17, x5"), + Q!(" mov " "x22, x1"), + Q!(" subs " "x21, x21, #0x1"), + Q!(" b.ne " Label!("edwards25519_scalarmuldouble_alt_invloop", 8, Before)), + Q!(" ldr " "x0, [sp]"), + Q!(" ldr " "x1, [sp, #32]"), + Q!(" mul " "x0, x0, x10"), + Q!(" madd " "x1, x1, x11, x0"), + Q!(" asr " "x0, x1, #63"), + Q!(" cmp " "x10, xzr"), + Q!(" csetm " "x14, mi"), + Q!(" cneg " "x10, x10, mi"), + Q!(" eor " "x14, x14, x0"), + Q!(" cmp " "x11, xzr"), + Q!(" csetm " "x15, mi"), + Q!(" cneg " "x11, x11, mi"), + Q!(" eor " "x15, x15, x0"), + Q!(" cmp " "x12, xzr"), + Q!(" csetm " "x16, mi"), + Q!(" cneg " "x12, x12, mi"), + Q!(" eor " "x16, x16, x0"), + Q!(" cmp " "x13, xzr"), + Q!(" csetm " "x17, mi"), + Q!(" cneg " "x13, x13, mi"), + Q!(" eor " "x17, x17, x0"), + Q!(" and " "x0, x10, x14"), + Q!(" and " "x1, x11, x15"), + Q!(" add " "x9, x0, x1"), + Q!(" ldr " "x7, [sp, #64]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x4, x9, x0"), + Q!(" adc " "x2, xzr, x1"), + Q!(" ldr " "x8, [sp, #96]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x4, x4, x0"), + Q!(" str " "x4, [sp, #64]"), + Q!(" adc " "x2, x2, x1"), + Q!(" ldr " "x7, [sp, #72]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x2, x2, x0"), + Q!(" adc " "x6, xzr, x1"), + Q!(" ldr " "x8, [sp, #104]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x2, x2, x0"), + Q!(" str " "x2, [sp, #72]"), + Q!(" adc " "x6, x6, x1"), + Q!(" ldr " "x7, [sp, #80]"), + Q!(" eor " "x1, x7, x14"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x6, x6, x0"), + Q!(" adc " "x5, xzr, x1"), + Q!(" ldr " "x8, [sp, #112]"), + Q!(" eor " "x1, x8, x15"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x6, x6, x0"), + Q!(" str " "x6, [sp, #80]"), + Q!(" adc " "x5, x5, x1"), + Q!(" ldr " "x7, [sp, #88]"), + Q!(" eor " "x1, x7, x14"), + Q!(" and " "x3, x14, x10"), + Q!(" neg " "x3, x3"), + Q!(" mul " "x0, x1, x10"), + Q!(" umulh " "x1, x1, x10"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" ldr " "x8, [sp, #120]"), + Q!(" eor " "x1, x8, x15"), + Q!(" and " "x0, x15, x11"), + Q!(" sub " "x3, x3, x0"), + Q!(" mul " "x0, x1, x11"), + Q!(" umulh " "x1, x1, x11"), + Q!(" adds " "x5, x5, x0"), + Q!(" adc " "x3, x3, x1"), + Q!(" extr " "x6, x3, x5, #63"), + Q!(" ldp " "x0, x1, [sp, #64]"), + Q!(" tst " "x3, x3"), + Q!(" cinc " "x6, x6, pl"), + Q!(" mov " "x3, #0x13"), + Q!(" mul " "x4, x6, x3"), + Q!(" add " "x5, x5, x6, lsl #63"), + Q!(" smulh " "x6, x6, x3"), + Q!(" ldr " "x2, [sp, #80]"), + Q!(" adds " "x0, x0, x4"), + Q!(" adcs " "x1, x1, x6"), + Q!(" asr " "x6, x6, #63"), + Q!(" adcs " "x2, x2, x6"), + Q!(" adcs " "x5, x5, x6"), + Q!(" csel " "x3, x3, xzr, mi"), + Q!(" subs " "x0, x0, x3"), + Q!(" sbcs " "x1, x1, xzr"), + Q!(" sbcs " "x2, x2, xzr"), + Q!(" sbc " "x5, x5, xzr"), + Q!(" and " "x5, x5, #0x7fffffffffffffff"), + Q!(" mov " "x4, x20"), + Q!(" stp " "x0, x1, [x4]"), + Q!(" stp " "x2, x5, [x4, #16]"), + + // Store result. Note that these are the only reductions mod 2^255-19 + + Q!(" mov " p0!() ", " res!()), + Q!(" add " p1!() ", " acc!()), + Q!(" add " p2!() ", " tabent!()), + mul_p25519!(x_0!(), x_1!(), x_2!()), + + Q!(" add " p0!() ", " res!() ", #32"), + Q!(" add " p1!() ", " acc!() "+ 32"), + Q!(" add " p2!() ", " tabent!()), + mul_p25519!(x_0!(), x_1!(), x_2!()), + + // Restore stack and registers + + Q!(" add " "sp, sp, # " NSPACE!()), + Q!(" ldp " "x25, x30, [sp], 16"), + Q!(" ldp " "x23, x24, [sp], 16"), + Q!(" ldp " "x21, x22, [sp], 16"), + Q!(" ldp " "x19, x20, [sp], 16"), + + // proc hoisting in -> ret after edwards25519_scalarmuldouble_alt_pepadd + Q!(" b " Label!("hoist_finish", 9, After)), + + // **************************************************************************** + // Localized versions of subroutines. + // These are close to the standalone functions "edwards25519_epdouble" etc., + // but are only maintaining reduction modulo 2^256 - 38, not 2^255 - 19. + // **************************************************************************** + + Q!(Label!("edwards25519_scalarmuldouble_alt_epdouble", 2) ":"), + Q!(" sub " "sp, sp, # (5 * " NUMSIZE!() ")"), + add_twice4!(t0!(), x_1!(), y_1!()), + sqr_4!(t1!(), z_1!()), + sqr_4!(t2!(), x_1!()), + sqr_4!(t3!(), y_1!()), + double_twice4!(t1!(), t1!()), + sqr_4!(t0!(), t0!()), + add_twice4!(t4!(), t2!(), t3!()), + sub_twice4!(t2!(), t2!(), t3!()), + add_twice4!(t3!(), t1!(), t2!()), + sub_twice4!(t1!(), t4!(), t0!()), + mul_4!(y_0!(), t2!(), t4!()), + mul_4!(z_0!(), t3!(), t2!()), + mul_4!(w_0!(), t1!(), t4!()), + mul_4!(x_0!(), t1!(), t3!()), + Q!(" add " "sp, sp, # (5 * " NUMSIZE!() ")"), + Q!(" ret " ), + + Q!(Label!("edwards25519_scalarmuldouble_alt_pdouble", 6) ":"), + Q!(" sub " "sp, sp, # (5 * " NUMSIZE!() ")"), + add_twice4!(t0!(), x_1!(), y_1!()), + sqr_4!(t1!(), z_1!()), + sqr_4!(t2!(), x_1!()), + sqr_4!(t3!(), y_1!()), + double_twice4!(t1!(), t1!()), + sqr_4!(t0!(), t0!()), + add_twice4!(t4!(), t2!(), t3!()), + sub_twice4!(t2!(), t2!(), t3!()), + add_twice4!(t3!(), t1!(), t2!()), + sub_twice4!(t1!(), t4!(), t0!()), + mul_4!(y_0!(), t2!(), t4!()), + mul_4!(z_0!(), t3!(), t2!()), + mul_4!(x_0!(), t1!(), t3!()), + Q!(" add " "sp, sp, # (5 * " NUMSIZE!() ")"), + Q!(" ret " ), + + Q!(Label!("edwards25519_scalarmuldouble_alt_epadd", 3) ":"), + Q!(" sub " "sp, sp, # (6 * " NUMSIZE!() ")"), + mul_4!(t0!(), w_1!(), w_2!()), + sub_twice4!(t1!(), y_1!(), x_1!()), + sub_twice4!(t2!(), y_2!(), x_2!()), + add_twice4!(t3!(), y_1!(), x_1!()), + add_twice4!(t4!(), y_2!(), x_2!()), + double_twice4!(t5!(), z_2!()), + mul_4!(t1!(), t1!(), t2!()), + mul_4!(t3!(), t3!(), t4!()), + load_k25519!(t2!()), + mul_4!(t2!(), t2!(), t0!()), + mul_4!(t4!(), z_1!(), t5!()), + sub_twice4!(t0!(), t3!(), t1!()), + add_twice4!(t5!(), t3!(), t1!()), + sub_twice4!(t1!(), t4!(), t2!()), + add_twice4!(t3!(), t4!(), t2!()), + mul_4!(w_0!(), t0!(), t5!()), + mul_4!(x_0!(), t0!(), t1!()), + mul_4!(y_0!(), t3!(), t5!()), + mul_4!(z_0!(), t1!(), t3!()), + Q!(" add " "sp, sp, # (6 * " NUMSIZE!() ")"), + Q!(" ret " ), + + Q!(Label!("edwards25519_scalarmuldouble_alt_pepadd", 4) ":"), + Q!(" sub " "sp, sp, # (6 * " NUMSIZE!() ")"), + double_twice4!(t0!(), z_1!()), + sub_twice4!(t1!(), y_1!(), x_1!()), + add_twice4!(t2!(), y_1!(), x_1!()), + mul_4!(t3!(), w_1!(), z_2!()), + mul_4!(t1!(), t1!(), x_2!()), + mul_4!(t2!(), t2!(), y_2!()), + sub_twice4!(t4!(), t0!(), t3!()), + add_twice4!(t0!(), t0!(), t3!()), + sub_twice4!(t5!(), t2!(), t1!()), + add_twice4!(t1!(), t2!(), t1!()), + mul_4!(z_0!(), t4!(), t0!()), + mul_4!(x_0!(), t5!(), t4!()), + mul_4!(y_0!(), t0!(), t1!()), + mul_4!(w_0!(), t5!(), t1!()), + Q!(" add " "sp, sp, # (6 * " NUMSIZE!() ")"), + Q!(" ret " ), + Q!(Label!("hoist_finish", 9) ":"), + inout("x0") res.as_mut_ptr() => _, + inout("x1") scalar.as_ptr() => _, + inout("x2") point.as_ptr() => _, + inout("x3") bscalar.as_ptr() => _, + edwards25519_scalarmuldouble_alt_table = sym edwards25519_scalarmuldouble_alt_table, + // clobbers + out("x10") _, + out("x11") _, + out("x12") _, + out("x13") _, + out("x14") _, + out("x15") _, + out("x16") _, + out("x17") _, + out("x20") _, + out("x21") _, + out("x22") _, + out("x23") _, + out("x24") _, + out("x25") _, + out("x30") _, + out("x4") _, + out("x5") _, + out("x6") _, + out("x7") _, + out("x8") _, + out("x9") _, + ) + }; +} + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +#[allow(dead_code)] +#[repr(align(4096))] +struct PageAlignedu64Array96([u64; 96]); + +static edwards25519_scalarmuldouble_alt_table: PageAlignedu64Array96 = PageAlignedu64Array96([ + // 1 * G + 0x9d103905d740913e, + 0xfd399f05d140beb3, + 0xa5c18434688f8a09, + 0x44fd2f9298f81267, + 0x2fbc93c6f58c3b85, + 0xcf932dc6fb8c0e19, + 0x270b4898643d42c2, + 0x07cf9d3a33d4ba65, + 0xabc91205877aaa68, + 0x26d9e823ccaac49e, + 0x5a1b7dcbdd43598c, + 0x6f117b689f0c65a8, + // 2 * G + 0x8a99a56042b4d5a8, + 0x8f2b810c4e60acf6, + 0xe09e236bb16e37aa, + 0x6bb595a669c92555, + 0x9224e7fc933c71d7, + 0x9f469d967a0ff5b5, + 0x5aa69a65e1d60702, + 0x590c063fa87d2e2e, + 0x43faa8b3a59b7a5f, + 0x36c16bdd5d9acf78, + 0x500fa0840b3d6a31, + 0x701af5b13ea50b73, + // 3 * G + 0x56611fe8a4fcd265, + 0x3bd353fde5c1ba7d, + 0x8131f31a214bd6bd, + 0x2ab91587555bda62, + 0xaf25b0a84cee9730, + 0x025a8430e8864b8a, + 0xc11b50029f016732, + 0x7a164e1b9a80f8f4, + 0x14ae933f0dd0d889, + 0x589423221c35da62, + 0xd170e5458cf2db4c, + 0x5a2826af12b9b4c6, + // 4 * G + 0x95fe050a056818bf, + 0x327e89715660faa9, + 0xc3e8e3cd06a05073, + 0x27933f4c7445a49a, + 0x287351b98efc099f, + 0x6765c6f47dfd2538, + 0xca348d3dfb0a9265, + 0x680e910321e58727, + 0x5a13fbe9c476ff09, + 0x6e9e39457b5cc172, + 0x5ddbdcf9102b4494, + 0x7f9d0cbf63553e2b, + // 5 * G + 0x7f9182c3a447d6ba, + 0xd50014d14b2729b7, + 0xe33cf11cb864a087, + 0x154a7e73eb1b55f3, + 0xa212bc4408a5bb33, + 0x8d5048c3c75eed02, + 0xdd1beb0c5abfec44, + 0x2945ccf146e206eb, + 0xbcbbdbf1812a8285, + 0x270e0807d0bdd1fc, + 0xb41b670b1bbda72d, + 0x43aabe696b3bb69a, + // 6 * G + 0x499806b67b7d8ca4, + 0x575be28427d22739, + 0xbb085ce7204553b9, + 0x38b64c41ae417884, + 0x3a0ceeeb77157131, + 0x9b27158900c8af88, + 0x8065b668da59a736, + 0x51e57bb6a2cc38bd, + 0x85ac326702ea4b71, + 0xbe70e00341a1bb01, + 0x53e4a24b083bc144, + 0x10b8e91a9f0d61e3, + // 7 * G + 0xba6f2c9aaa3221b1, + 0x6ca021533bba23a7, + 0x9dea764f92192c3a, + 0x1d6edd5d2e5317e0, + 0x6b1a5cd0944ea3bf, + 0x7470353ab39dc0d2, + 0x71b2528228542e49, + 0x461bea69283c927e, + 0xf1836dc801b8b3a2, + 0xb3035f47053ea49a, + 0x529c41ba5877adf3, + 0x7a9fbb1c6a0f90a7, + // 8 * G + 0xe2a75dedf39234d9, + 0x963d7680e1b558f9, + 0x2c2741ac6e3c23fb, + 0x3a9024a1320e01c3, + 0x59b7596604dd3e8f, + 0x6cb30377e288702c, + 0xb1339c665ed9c323, + 0x0915e76061bce52f, + 0xe7c1f5d9c9a2911a, + 0xb8a371788bcca7d7, + 0x636412190eb62a32, + 0x26907c5c2ecc4e95, +]); diff --git a/graviola/src/low/aarch64/mod.rs b/graviola/src/low/aarch64/mod.rs index 43c3d077d..37fbeed68 100644 --- a/graviola/src/low/aarch64/mod.rs +++ b/graviola/src/low/aarch64/mod.rs @@ -27,6 +27,10 @@ pub(crate) mod bignum_kmul_16_32; pub(crate) mod bignum_kmul_32_64; pub(crate) mod bignum_ksqr_16_32; pub(crate) mod bignum_ksqr_32_64; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod bignum_madd_n25519; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod bignum_mod_n25519; pub(crate) mod bignum_mod_n256; pub(crate) mod bignum_mod_n384; pub(crate) mod bignum_modadd; @@ -42,6 +46,8 @@ pub(crate) mod bignum_montsqr_p256; pub(crate) mod bignum_montsqr_p384; pub(crate) mod bignum_mul; pub(crate) mod bignum_mux; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod bignum_neg_p25519; pub(crate) mod bignum_neg_p256; pub(crate) mod bignum_neg_p384; pub(crate) mod bignum_negmodinv; @@ -54,6 +60,12 @@ pub(crate) mod bignum_tomont_p384; pub(crate) mod cpu; pub(crate) mod curve25519_x25519; pub(crate) mod curve25519_x25519base; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod edwards25519_decode; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod edwards25519_scalarmulbase; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod edwards25519_scalarmuldouble; pub(crate) mod ghash; pub(crate) mod p256_montjadd; pub(crate) mod p256_montjdouble; diff --git a/graviola/src/low/mod.rs b/graviola/src/low/mod.rs index 5e6fd9c74..109f5e98f 100644 --- a/graviola/src/low/mod.rs +++ b/graviola/src/low/mod.rs @@ -66,6 +66,10 @@ cfg_if::cfg_if! { pub(crate) use x86_64::bignum_kmul_32_64::bignum_kmul_32_64; pub(crate) use x86_64::bignum_ksqr_16_32::bignum_ksqr_16_32; pub(crate) use x86_64::bignum_ksqr_32_64::bignum_ksqr_32_64; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use x86_64::bignum_madd_n25519::bignum_madd_n25519; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use x86_64::bignum_mod_n25519::bignum_mod_n25519; pub(crate) use x86_64::bignum_mod_n256::bignum_mod_n256; pub(crate) use x86_64::bignum_mod_n384::bignum_mod_n384; pub(crate) use x86_64::bignum_modadd::bignum_modadd; @@ -81,6 +85,8 @@ cfg_if::cfg_if! { pub(crate) use x86_64::bignum_montsqr_p384::bignum_montsqr_p384; pub(crate) use x86_64::bignum_mul::bignum_mul; pub(crate) use x86_64::bignum_mux::bignum_mux; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use x86_64::bignum_neg_p25519::bignum_neg_p25519; pub(crate) use x86_64::bignum_neg_p256::bignum_neg_p256; pub(crate) use x86_64::bignum_neg_p384::bignum_neg_p384; pub(crate) use x86_64::bignum_negmodinv::bignum_negmodinv; @@ -90,6 +96,12 @@ cfg_if::cfg_if! { pub(crate) use x86_64::bignum_tomont_p384::bignum_tomont_p384; pub(crate) use x86_64::curve25519_x25519::curve25519_x25519; pub(crate) use x86_64::curve25519_x25519base::curve25519_x25519base; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use x86_64::edwards25519_decode::edwards25519_decode; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use x86_64::edwards25519_scalarmulbase::edwards25519_scalarmulbase; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use x86_64::edwards25519_scalarmuldouble::edwards25519_scalarmuldouble; pub(crate) use x86_64::ghash; pub(crate) use x86_64::p256_montjadd::p256_montjadd; pub(crate) use x86_64::p256_montjdouble::p256_montjdouble; @@ -126,6 +138,10 @@ cfg_if::cfg_if! { pub(crate) use aarch64::bignum_kmul_32_64::bignum_kmul_32_64; pub(crate) use aarch64::bignum_ksqr_16_32::bignum_ksqr_16_32; pub(crate) use aarch64::bignum_ksqr_32_64::bignum_ksqr_32_64; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use aarch64::bignum_madd_n25519::bignum_madd_n25519; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use aarch64::bignum_mod_n25519::bignum_mod_n25519; pub(crate) use aarch64::bignum_mod_n256::bignum_mod_n256; pub(crate) use aarch64::bignum_mod_n384::bignum_mod_n384; pub(crate) use aarch64::bignum_modadd::bignum_modadd; @@ -141,6 +157,8 @@ cfg_if::cfg_if! { pub(crate) use aarch64::bignum_montsqr_p384::bignum_montsqr_p384; pub(crate) use aarch64::bignum_mul::bignum_mul; pub(crate) use aarch64::bignum_mux::bignum_mux; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use aarch64::bignum_neg_p25519::bignum_neg_p25519; pub(crate) use aarch64::bignum_neg_p256::bignum_neg_p256; pub(crate) use aarch64::bignum_neg_p384::bignum_neg_p384; pub(crate) use aarch64::bignum_negmodinv::bignum_negmodinv; @@ -150,6 +168,12 @@ cfg_if::cfg_if! { pub(crate) use aarch64::bignum_tomont_p384::bignum_tomont_p384; pub(crate) use aarch64::curve25519_x25519::curve25519_x25519; pub(crate) use aarch64::curve25519_x25519base::curve25519_x25519base; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use aarch64::edwards25519_decode::edwards25519_decode; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use aarch64::edwards25519_scalarmulbase::edwards25519_scalarmulbase; + #[allow(unused_imports)] // TODO(phlip9): remove + pub(crate) use aarch64::edwards25519_scalarmuldouble::edwards25519_scalarmuldouble; pub(crate) use aarch64::ghash; pub(crate) use aarch64::p256_montjadd::p256_montjadd; pub(crate) use aarch64::p256_montjdouble::p256_montjdouble; diff --git a/graviola/src/low/tests.rs b/graviola/src/low/tests.rs index b1903fa2b..f506e2fc1 100644 --- a/graviola/src/low/tests.rs +++ b/graviola/src/low/tests.rs @@ -58,3 +58,18 @@ mod model { } } } + +// TODO(phlip9): remove this after proper ed25519 infrastructure is in place +#[test] +fn test_edwards25519_decode() { + use hex::FromHex; + + // valid pubkey from RFC 8032 + let pk_hex = "d75a980182b10ab7d54bfed3c964073a0ee172f3daa62325af021a68f707511a"; + let pk = <[u8; 32]>::from_hex(pk_hex).unwrap(); + let mut point = [0u64; 8]; + assert!(super::edwards25519_decode(&mut point, &pk)); + + // invalid pubkey + assert!(!super::edwards25519_decode(&mut point, &[0xffu8; 32])); +} diff --git a/graviola/src/low/x86_64/bignum_madd_n25519.rs b/graviola/src/low/x86_64/bignum_madd_n25519.rs new file mode 100644 index 000000000..7b7d26023 --- /dev/null +++ b/graviola/src/low/x86_64/bignum_madd_n25519.rs @@ -0,0 +1,235 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Multiply-add modulo the order of the curve25519/edwards25519 basepoint +// Inputs x[4], y[4], c[4]; output z[4] +// +// extern void bignum_madd_n25519(uint64_t z[static 4], const uint64_t x[static 4], +// const uint64_t y[static 4], +// const uint64_t c[static 4]); +// +// Performs z := (x * y + c) mod n_25519, where the modulus is +// n_25519 = 2^252 + 27742317777372353535851937790883648493, the +// order of the curve25519/edwards25519 basepoint. The result z +// and the inputs x, y and c are all 4 digits (256 bits). +// +// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y, RCX = c +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y, R9 = c +// ---------------------------------------------------------------------------- + +// Single round of modular reduction mod_n25519, mapping +// [m4;m3;m2;m1;m0] = m to [m3;m2;m1;m0] = m mod n_25519, +// *assuming* the input m < 2^64 * n_25519. This is very +// close to the loop body of the bignum_mod_n25519 function. + +macro_rules! reduce { + ($m4:expr, $m3:expr, $m2:expr, $m1:expr, $m0:expr) => { Q!( + "mov rbx, " $m4 ";\n" + "shld rbx, " $m3 ", 0x4;\n" + "shr " $m4 ", 0x3c;\n" + "sub rbx, " $m4 ";\n" + "shl " $m3 ", 0x4;\n" + "shrd " $m3 ", " $m4 ", 0x4;\n" + "movabs rax, 0x5812631a5cf5d3ed;\n" + "mul rbx;\n" + "mov rbp, rax;\n" + "mov rcx, rdx;\n" + "movabs rax, 0x14def9dea2f79cd6;\n" + "mul rbx;\n" + "add rcx, rax;\n" + "adc rdx, 0x0;\n" + "sub " $m0 ", rbp;\n" + "sbb " $m1 ", rcx;\n" + "sbb " $m2 ", rdx;\n" + "sbb " $m3 ", 0x0;\n" + "sbb rbx, rbx;\n" + "movabs rax, 0x5812631a5cf5d3ed;\n" + "and rax, rbx;\n" + "movabs rdx, 0x14def9dea2f79cd6;\n" + "and rdx, rbx;\n" + "movabs rbx, 0x1000000000000000;\n" + "and rbx, rax;\n" + "add " $m0 ", rax;\n" + "adc " $m1 ", rdx;\n" + "adc " $m2 ", 0x0;\n" + "adc " $m3 ", rbx" + )} +} + +// Special case of "reduce" with m4 = 0. As well as not using m4, +// the quotient selection is slightly simpler, just floor(m/2^252) +// versus min (floor(m/2^252)) (2^63-1). + +macro_rules! reduce0 { + ($m3:expr, $m2:expr, $m1:expr, $m0:expr) => { Q!( + "mov rbx, " $m3 ";\n" + "shr rbx, 60;\n" + "shl " $m3 ", 4;\n" + "shr " $m3 ", 4;\n" + "movabs rax, 0x5812631a5cf5d3ed;\n" + "mul rbx;\n" + "mov rbp, rax;\n" + "mov rcx, rdx;\n" + "movabs rax, 0x14def9dea2f79cd6;\n" + "mul rbx;\n" + "add rcx, rax;\n" + "adc rdx, 0x0;\n" + "sub " $m0 ", rbp;\n" + "sbb " $m1 ", rcx;\n" + "sbb " $m2 ", rdx;\n" + "sbb " $m3 ", 0x0;\n" + "sbb rbx, rbx;\n" + "movabs rax, 0x5812631a5cf5d3ed;\n" + "and rax, rbx;\n" + "movabs rdx, 0x14def9dea2f79cd6;\n" + "and rdx, rbx;\n" + "movabs rbx, 0x1000000000000000;\n" + "and rbx, rax;\n" + "add " $m0 ", rax;\n" + "adc " $m1 ", rdx;\n" + "adc " $m2 ", 0x0;\n" + "adc " $m3 ", rbx" + )} +} + +/// Multiply-add modulo the order of the curve25519/edwards25519 basepoint +/// +/// Inputs x[4], y[4], c[4]; output z[4] +/// +/// Performs z := (x * y + c) mod n_25519, where the modulus is +/// n_25519 = 2^252 + 27742317777372353535851937790883648493, the +/// order of the curve25519/edwards25519 basepoint. The result z +/// and the inputs x, y and c are all 4 digits (256 bits). +pub(crate) fn bignum_madd_n25519(z: &mut [u64; 4], x: &[u64; 4], y: &[u64; 4], c: &[u64; 4]) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + Q!(" endbr64 " ), + + + // Save some additional registers for use + + Q!(" push " "rbx"), + Q!(" push " "rbp"), + Q!(" push " "r12"), + Q!(" push " "r13"), + Q!(" push " "r14"), + Q!(" push " "r15"), + + // First compute [r15;r14;r13;r12;r11;r10;r9;r8] = x * y + c. This is + // a multiply-add variant of an ADCX/ADOX-based schoolbook multiplier, + // starting the accumulation with the c term and doing the zeroth row + // in the same uniform fashion, otherwise similar to the start of + // bignum_mul_p256k1. + + Q!(" mov " "r8, [rcx]"), + Q!(" mov " "r9, [rcx + 8]"), + Q!(" mov " "r10, [rcx + 16]"), + Q!(" mov " "r11, [rcx + 24]"), + Q!(" mov " "rcx, rdx"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rdx, [rcx]"), + Q!(" mulx " "rbx, rax, [rsi]"), + Q!(" adcx " "r8, rax"), + Q!(" adox " "r9, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x8]"), + Q!(" adcx " "r9, rax"), + Q!(" adox " "r10, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x10]"), + Q!(" adcx " "r10, rax"), + Q!(" adox " "r11, rbx"), + Q!(" mulx " "r12, rax, [rsi + 0x18]"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rbp"), + Q!(" adcx " "r12, rbp"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rdx, [rcx + 0x8]"), + Q!(" mulx " "rbx, rax, [rsi]"), + Q!(" adcx " "r9, rax"), + Q!(" adox " "r10, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x8]"), + Q!(" adcx " "r10, rax"), + Q!(" adox " "r11, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x10]"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rbx"), + Q!(" mulx " "r13, rax, [rsi + 0x18]"), + Q!(" adcx " "r12, rax"), + Q!(" adox " "r13, rbp"), + Q!(" adcx " "r13, rbp"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rdx, [rcx + 0x10]"), + Q!(" mulx " "rbx, rax, [rsi]"), + Q!(" adcx " "r10, rax"), + Q!(" adox " "r11, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x8]"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x10]"), + Q!(" adcx " "r12, rax"), + Q!(" adox " "r13, rbx"), + Q!(" mulx " "r14, rax, [rsi + 0x18]"), + Q!(" adcx " "r13, rax"), + Q!(" adox " "r14, rbp"), + Q!(" adcx " "r14, rbp"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rdx, [rcx + 0x18]"), + Q!(" mulx " "rbx, rax, [rsi]"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x8]"), + Q!(" adcx " "r12, rax"), + Q!(" adox " "r13, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x10]"), + Q!(" adcx " "r13, rax"), + Q!(" adox " "r14, rbx"), + Q!(" mulx " "r15, rax, [rsi + 0x18]"), + Q!(" adcx " "r14, rax"), + Q!(" adox " "r15, rbp"), + Q!(" adcx " "r15, rbp"), + + // Now do the modular reduction and write back + + reduce0!("r15", "r14", "r13", "r12"), + reduce!("r15", "r14", "r13", "r12", "r11"), + reduce!("r14", "r13", "r12", "r11", "r10"), + reduce!("r13", "r12", "r11", "r10", "r9"), + reduce!("r12", "r11", "r10", "r9", "r8"), + + Q!(" mov " "[rdi], r8"), + Q!(" mov " "[rdi + 8], r9"), + Q!(" mov " "[rdi + 16], r10"), + Q!(" mov " "[rdi + 24], r11"), + + // Restore registers and return + + Q!(" pop " "r15"), + Q!(" pop " "r14"), + Q!(" pop " "r13"), + Q!(" pop " "r12"), + Q!(" pop " "rbp"), + Q!(" pop " "rbx"), + + inout("rdi") z.as_mut_ptr() => _, + inout("rsi") x.as_ptr() => _, + inout("rdx") y.as_ptr() => _, + inout("rcx") c.as_ptr() => _, + // clobbers + out("r10") _, + out("r11") _, + out("r12") _, + out("r13") _, + out("r14") _, + out("r15") _, + out("r8") _, + out("r9") _, + out("rax") _, + ) + }; +} diff --git a/graviola/src/low/x86_64/bignum_mod_n25519.rs b/graviola/src/low/x86_64/bignum_mod_n25519.rs new file mode 100644 index 000000000..5260bfc9c --- /dev/null +++ b/graviola/src/low/x86_64/bignum_mod_n25519.rs @@ -0,0 +1,273 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Reduce modulo basepoint order, z := x mod n_25519 +// Input x[k]; output z[4] +// +// extern void bignum_mod_n25519(uint64_t z[static 4], uint64_t k, +// const uint64_t *x); +// +// Reduction is modulo the order of the curve25519/edwards25519 basepoint, +// which is n_25519 = 2^252 + 27742317777372353535851937790883648493 +// +// Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x +// ---------------------------------------------------------------------------- + +macro_rules! z { + () => { + "rdi" + }; +} +macro_rules! k { + () => { + "rsi" + }; +} +macro_rules! x { + () => { + "rcx" + }; +} + +macro_rules! m0 { + () => { + "r8" + }; +} +macro_rules! m1 { + () => { + "r9" + }; +} +macro_rules! m2 { + () => { + "r10" + }; +} +macro_rules! m3 { + () => { + "r11" + }; +} +macro_rules! d { + () => { + "r12" + }; +} + +macro_rules! q { + () => { + "rbx" + }; +} + +/// Reduce modulo basepoint order, z := x mod n_25519 +/// +/// Input x[k]; output z[4] +/// +/// Reduction is modulo the order of the curve25519/edwards25519 basepoint, +/// which is n_25519 = 2^252 + 27742317777372353535851937790883648493 +pub(crate) fn bignum_mod_n25519(z: &mut [u64; 4], x: &[u64]) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + Q!(" endbr64 " ), + + + // Save extra registers + + Q!(" push " "rbx"), + Q!(" push " "rbp"), + Q!(" push " "r12"), + + // If the input is already <= 3 words long, go to a trivial "copy" path + + Q!(" cmp " k!() ", 4"), + Q!(" jc " Label!("bignum_mod_n25519_shortinput", 2, After)), + + // Otherwise load the top 4 digits (top-down) and reduce k by 4 + // This [m3;m2;m1;m0] is the initial x where we begin reduction. + + Q!(" sub " k!() ", 4"), + Q!(" mov " m3!() ", [rdx + 8 * " k!() "+ 24]"), + Q!(" mov " m2!() ", [rdx + 8 * " k!() "+ 16]"), + Q!(" mov " m1!() ", [rdx + 8 * " k!() "+ 8]"), + Q!(" mov " m0!() ", [rdx + 8 * " k!() "]"), + + // Move x into another register to leave rdx free for multiplies + + Q!(" mov " x!() ", rdx"), + + // Get the quotient estimate q = floor(x/2^252). + // Also delete it from m3, in effect doing x' = x - q * 2^252 + + Q!(" mov " q!() ", " m3!()), + Q!(" shr " q!() ", 60"), + + Q!(" shl " m3!() ", 4"), + Q!(" shr " m3!() ", 4"), + + // Let [rdx;d;rbp] = q * (n_25519 - 2^252) + + Q!(" mov " "rax, 0x5812631a5cf5d3ed"), + Q!(" mul " q!()), + Q!(" mov " "rbp, rax"), + Q!(" mov " d!() ", rdx"), + + Q!(" mov " "rax, 0x14def9dea2f79cd6"), + Q!(" mul " q!()), + Q!(" add " d!() ", rax"), + Q!(" adc " "rdx, 0"), + + // Subtract to get x' - q * (n_25519 - 2^252) = x - q * n_25519 + + Q!(" sub " m0!() ", rbp"), + Q!(" sbb " m1!() ", " d!()), + Q!(" sbb " m2!() ", rdx"), + Q!(" sbb " m3!() ", 0"), + + // Get a bitmask for the borrow and create a masked version of + // non-trivial digits of [rbx;0;rdx;rax] = n_25519, then add it. + // The masked n3 digit exploits the fact that bit 60 of n0 is set. + + Q!(" sbb " "rbx, rbx"), + + Q!(" mov " "rax, 0x5812631a5cf5d3ed"), + Q!(" and " "rax, rbx"), + Q!(" mov " "rdx, 0x14def9dea2f79cd6"), + Q!(" and " "rdx, rbx"), + Q!(" mov " "rbx, 0x1000000000000000"), + Q!(" and " "rbx, rax"), + + Q!(" add " m0!() ", rax"), + Q!(" adc " m1!() ", rdx"), + Q!(" adc " m2!() ", 0"), + Q!(" adc " m3!() ", rbx"), + + // Now do (k-4) iterations of 5->4 word modular reduction. Each one + // is similar to the sequence above except for the more refined quotient + // estimation process. + + Q!(" test " k!() ", " k!()), + Q!(" jz " Label!("bignum_mod_n25519_writeback", 3, After)), + + Q!(Label!("bignum_mod_n25519_loop", 4) ":"), + + // Assume that the new 5-digit x is 2^64 * previous_x + next_digit. + // Get the quotient estimate q = max (floor(x/2^252)) (2^64 - 1) + // and first compute x' = x - 2^252 * q. + + Q!(" mov " q!() ", " m3!()), + Q!(" shld " q!() ", " m2!() ", 4"), + Q!(" shr " m3!() ", 60"), + Q!(" sub " q!() ", " m3!()), + Q!(" shl " m2!() ", 4"), + Q!(" shrd " m2!() ", " m3!() ", 4"), + + // Let [rdx;m3;rbp] = q * (n_25519 - 2^252) + + Q!(" mov " "rax, 0x5812631a5cf5d3ed"), + Q!(" mul " q!()), + Q!(" mov " "rbp, rax"), + Q!(" mov " m3!() ", rdx"), + + Q!(" mov " "rax, 0x14def9dea2f79cd6"), + Q!(" mul " q!()), + Q!(" add " m3!() ", rax"), + Q!(" adc " "rdx, 0"), + + // Load the next digit + + Q!(" mov " d!() ", [" x!() "+ 8 * " k!() "-8]"), + + // Subtract to get x' - q * (n_25519 - 2^252) = x - q * n_25519 + + Q!(" sub " d!() ", rbp"), + Q!(" sbb " m0!() ", " m3!()), + Q!(" sbb " m1!() ", rdx"), + Q!(" sbb " m2!() ", 0"), + + // Get a bitmask for the borrow and create a masked version of + // non-trivial digits of [rbx;0;rdx;rax] = n_25519, then add it. + // The masked n3 digit exploits the fact that bit 60 of n0 is set. + + Q!(" sbb " "rbx, rbx"), + + Q!(" mov " "rax, 0x5812631a5cf5d3ed"), + Q!(" and " "rax, rbx"), + Q!(" mov " "rdx, 0x14def9dea2f79cd6"), + Q!(" and " "rdx, rbx"), + Q!(" mov " "rbx, 0x1000000000000000"), + Q!(" and " "rbx, rax"), + + Q!(" add " d!() ", rax"), + Q!(" adc " m0!() ", rdx"), + Q!(" adc " m1!() ", 0"), + Q!(" adc " m2!() ", rbx"), + + // Now shuffle registers up and loop + + Q!(" mov " m3!() ", " m2!()), + Q!(" mov " m2!() ", " m1!()), + Q!(" mov " m1!() ", " m0!()), + Q!(" mov " m0!() ", " d!()), + + Q!(" dec " k!()), + Q!(" jnz " Label!("bignum_mod_n25519_loop", 4, Before)), + + // Write back + + Q!(Label!("bignum_mod_n25519_writeback", 3) ":"), + + Q!(" mov " "[" z!() "], " m0!()), + Q!(" mov " "[" z!() "+ 8], " m1!()), + Q!(" mov " "[" z!() "+ 16], " m2!()), + Q!(" mov " "[" z!() "+ 24], " m3!()), + + // Restore registers and return + + Q!(" pop " "r12"), + Q!(" pop " "rbp"), + Q!(" pop " "rbx"), + // linear hoisting in -> jmp after bignum_mod_n25519_shortinput + Q!(" jmp " Label!("hoist_finish", 5, After)), + + Q!(Label!("bignum_mod_n25519_shortinput", 2) ":"), + + Q!(" xor " m0!() ", " m0!()), + Q!(" xor " m1!() ", " m1!()), + Q!(" xor " m2!() ", " m2!()), + Q!(" xor " m3!() ", " m3!()), + + Q!(" test " k!() ", " k!()), + Q!(" jz " Label!("bignum_mod_n25519_writeback", 3, Before)), + Q!(" mov " m0!() ", [rdx]"), + Q!(" dec " k!()), + Q!(" jz " Label!("bignum_mod_n25519_writeback", 3, Before)), + Q!(" mov " m1!() ", [rdx + 8]"), + Q!(" dec " k!()), + Q!(" jz " Label!("bignum_mod_n25519_writeback", 3, Before)), + Q!(" mov " m2!() ", [rdx + 16]"), + Q!(" jmp " Label!("bignum_mod_n25519_writeback", 3, Before)), + Q!(Label!("hoist_finish", 5) ":"), + inout("rdi") z.as_mut_ptr() => _, + inout("rsi") x.len() => _, + inout("rdx") x.as_ptr() => _, + // clobbers + out("r10") _, + out("r11") _, + out("r12") _, + out("r8") _, + out("r9") _, + out("rax") _, + out("rcx") _, + ) + }; +} diff --git a/graviola/src/low/x86_64/bignum_neg_p25519.rs b/graviola/src/low/x86_64/bignum_neg_p25519.rs new file mode 100644 index 000000000..e48fd984b --- /dev/null +++ b/graviola/src/low/x86_64/bignum_neg_p25519.rs @@ -0,0 +1,123 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced +// Input x[4]; output z[4] +// +// extern void bignum_neg_p25519(uint64_t z[static 4], const uint64_t x[static 4]); +// +// Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x +// ---------------------------------------------------------------------------- + +macro_rules! z { + () => { + "rdi" + }; +} +macro_rules! x { + () => { + "rsi" + }; +} + +macro_rules! q { + () => { + "rdx" + }; +} +macro_rules! n0 { + () => { + "rax" + }; +} +macro_rules! n1 { + () => { + "rcx" + }; +} +macro_rules! n2 { + () => { + "r8" + }; +} +macro_rules! n3 { + () => { + "r9" + }; +} + +macro_rules! c { + () => { + "r10" + }; +} + +macro_rules! qshort { + () => { + "esi" + }; +} + +/// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced +/// +/// Input x[4]; output z[4] +pub(crate) fn bignum_neg_p25519(z: &mut [u64; 4], x: &[u64; 4]) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + Q!(" endbr64 " ), + + + // Load the 4 digits of x and let q be an OR of all the digits + + Q!(" mov " n0!() ", [" x!() "]"), + Q!(" mov " q!() ", " n0!()), + Q!(" mov " n1!() ", [" x!() "+ 8]"), + Q!(" or " q!() ", " n1!()), + Q!(" mov " n2!() ", [" x!() "+ 16]"), + Q!(" or " q!() ", " n2!()), + Q!(" mov " n3!() ", [" x!() "+ 24]"), + Q!(" or " q!() ", " n3!()), + + // Turn q into a strict x <> 0 bitmask, and c into a masked constant [-19] + // so that [q;q;q;c] = [2^256 - 19], masked according to nonzeroness of x + + Q!(" neg " q!()), + Q!(" sbb " q!() ", " q!()), + Q!(" mov " c!() ", -19"), + Q!(" and " c!() ", " q!()), + + // Now just do [2^256 - 19] - x and then mask to 255 bits, + // which means in effect the required [2^255 - 19] - x + + Q!(" sub " c!() ", " n0!()), + Q!(" mov " "[" z!() "], " c!()), + Q!(" mov " c!() ", " q!()), + Q!(" sbb " c!() ", " n1!()), + Q!(" mov " "[" z!() "+ 8], " c!()), + Q!(" mov " c!() ", " q!()), + Q!(" sbb " c!() ", " n2!()), + Q!(" mov " "[" z!() "+ 16], " c!()), + Q!(" sbb " q!() ", " n3!()), + Q!(" btr " q!() ", 63"), + Q!(" mov " "[" z!() "+ 24], " q!()), + + inout("rdi") z.as_mut_ptr() => _, + inout("rsi") x.as_ptr() => _, + // clobbers + out("r10") _, + out("r8") _, + out("r9") _, + out("rax") _, + out("rcx") _, + out("rdx") _, + ) + }; +} diff --git a/graviola/src/low/x86_64/edwards25519_decode.rs b/graviola/src/low/x86_64/edwards25519_decode.rs new file mode 100644 index 000000000..dcd768fb9 --- /dev/null +++ b/graviola/src/low/x86_64/edwards25519_decode.rs @@ -0,0 +1,710 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Decode compressed 256-bit form of edwards25519 point +// Input c[32] (bytes); output function return and z[8] +// +// extern uint64_t edwards25519_decode(uint64_t z[static 8], const uint8_t c[static 32]); +// +// This interprets the input byte string as a little-endian number +// representing a point (x,y) on the edwards25519 curve, encoded as +// 2^255 * x_0 + y where x_0 is the least significant bit of x. It +// returns the full pair of coordinates x (at z) and y (at z+4). The +// return code is 0 for success and 1 for failure, which means that +// the input does not correspond to the encoding of any edwards25519 +// point. This can happen for three reasons, where y = the lowest +// 255 bits of the input: +// +// * y >= p_25519 +// Input y coordinate is not reduced +// * (y^2 - 1) * (1 + d_25519 * y^2) has no modular square root +// There is no x such that (x,y) is on the curve +// * y^2 = 1 and top bit of input is set +// Cannot be the canonical encoding of (0,1) or (0,-1) +// +// Standard x86-64 ABI: RDI = z, RSI = c +// Microsoft x64 ABI: RCX = z, RDX = c +// ---------------------------------------------------------------------------- + +// Size in bytes of a 64-bit word + +macro_rules! N { + () => { + "8" + }; +} + +// Pointer-offset pairs for temporaries on stack + +macro_rules! y { + () => { + "rsp + 0" + }; +} +macro_rules! s { () => { Q!("rsp + (4 * " N!() ")") } } +macro_rules! t { () => { Q!("rsp + (8 * " N!() ")") } } +macro_rules! u { () => { Q!("rsp + (12 * " N!() ")") } } +macro_rules! v { () => { Q!("rsp + (16 * " N!() ")") } } +macro_rules! w { () => { Q!("rsp + (20 * " N!() ")") } } +macro_rules! q { () => { Q!("rsp + (24 * " N!() ")") } } +macro_rules! res { () => { Q!("QWORD PTR [rsp + (28 * " N!() ")]") } } +macro_rules! sgnbit { () => { Q!("QWORD PTR [rsp + (29 * " N!() ")]") } } +macro_rules! badun { () => { Q!("QWORD PTR [rsp + (30 * " N!() ")]") } } + +// Total size to reserve on the stack + +macro_rules! NSPACE { () => { Q!("(32 * " N!() ")") } } + +// Corrupted versions when stack is down 8 more + +macro_rules! q8 { () => { Q!("rsp + (25 * " N!() ")") } } + +// Syntactic variants to make x86_att version simpler to generate + +macro_rules! Y { + () => { + "0" + }; +} +macro_rules! S { () => { Q!("(4 * " N!() ")") } } +macro_rules! T { () => { Q!("(8 * " N!() ")") } } +macro_rules! U { () => { Q!("(12 * " N!() ")") } } +macro_rules! V { () => { Q!("(16 * " N!() ")") } } +macro_rules! W { () => { Q!("(20 * " N!() ")") } } +macro_rules! Q8 { () => { Q!("(25 * " N!() ")") } } + +/// Decode compressed 256-bit form of edwards25519 point +/// +/// Input c[32] (bytes); output function return and z[8] +/// +/// This interprets the input byte string as a little-endian number +/// representing a point (x,y) on the edwards25519 curve, encoded as +/// 2^255 * x_0 + y where x_0 is the least significant bit of x. It +/// returns the full pair of coordinates x (at z) and y (at z+4). The +/// return code is 0 for success and 1 for failure, which means that +/// the input does not correspond to the encoding of any edwards25519 +/// point. This can happen for three reasons, where y = the lowest +/// 255 bits of the input: +/// +/// * y >= p_25519 +/// Input y coordinate is not reduced +/// * (y^2 - 1) * (1 + d_25519 * y^2) has no modular square root +/// There is no x such that (x,y) is on the curve +/// * y^2 = 1 and top bit of input is set +/// Cannot be the canonical encoding of (0,1) or (0,-1) +pub(crate) fn edwards25519_decode(z: &mut [u64; 8], c: &[u8; 32]) -> bool { + let ret: u64; + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + Q!(" endbr64 " ), + + // In this case the Windows form literally makes a subroutine call. + // This avoids hassle arising from subroutine offsets + + + + // Save registers and make room for temporaries + + Q!(" push " "rbx"), + Q!(" push " "rbp"), + Q!(" push " "r12"), + Q!(" push " "r13"), + Q!(" push " "r14"), + Q!(" push " "r15"), + + Q!(" sub " "rsp, " NSPACE!()), + + // Save the return pointer for the end so we can overwrite rdi later + + Q!(" mov " res!() ", rdi"), + + // Load the inputs, which can be done word-wise since x86 is little-endian. + // Let y be the lowest 255 bits of the input and sgnbit the desired parity. + // If y >= p_25519 then already flag the input as invalid (badun = 1). + + Q!(" mov " "rax, [rsi]"), + Q!(" mov " "[rsp + " Y!() "], rax"), + Q!(" mov " "rbx, [rsi + 8]"), + Q!(" mov " "[rsp + " Y!() "+ 8], rbx"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rcx, [rsi + 16]"), + Q!(" mov " "[rsp + " Y!() "+ 16], rcx"), + Q!(" mov " "rdx, [rsi + 24]"), + Q!(" btr " "rdx, 63"), + Q!(" mov " "[rsp + " Y!() "+ 24], rdx"), + Q!(" adc " "rbp, rbp"), + Q!(" mov " sgnbit!() ", rbp"), + + Q!(" add " "rax, 19"), + Q!(" adc " "rbx, 0"), + Q!(" adc " "rcx, 0"), + Q!(" adc " "rdx, 0"), + Q!(" shr " "rdx, 63"), + Q!(" mov " badun!() ", rdx"), + + // u = y^2 - 1 (actually y + 2^255-20, not reduced modulo) + // v = 1 + d * y^2 (not reduced modulo from the +1) + // w = u * v + + Q!(" lea " "rdi, [rsp + " V!() "]"), + Q!(" mov " "rsi, 1"), + Q!(" lea " "rdx, [rsp + " Y!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + Q!(" mov " "rax, [rsp + " V!() "]"), + Q!(" sub " "rax, 20"), + Q!(" mov " "rbx, [rsp + " V!() "+ 8]"), + Q!(" sbb " "rbx, 0"), + Q!(" mov " "rcx, [rsp + " V!() "+ 16]"), + Q!(" sbb " "rcx, 0"), + Q!(" mov " "rdx, [rsp + " V!() "+ 24]"), + Q!(" sbb " "rdx, 0"), + Q!(" btc " "rdx, 63"), + Q!(" mov " "[rsp + " U!() "], rax"), + Q!(" mov " "[rsp + " U!() "+ 8], rbx"), + Q!(" mov " "[rsp + " U!() "+ 16], rcx"), + Q!(" mov " "[rsp + " U!() "+ 24], rdx"), + + Q!(" mov " "rax, 0x75eb4dca135978a3"), + Q!(" mov " "[rsp + " W!() "], rax"), + Q!(" mov " "rax, 0x00700a4d4141d8ab"), + Q!(" mov " "[rsp + " W!() "+ 8], rax"), + Q!(" mov " "rax, 0x8cc740797779e898"), + Q!(" mov " "[rsp + " W!() "+ 16], rax"), + Q!(" mov " "rax, 0x52036cee2b6ffe73"), + Q!(" mov " "[rsp + " W!() "+ 24], rax"), + Q!(" lea " "rdi, [rsp + " V!() "]"), + Q!(" lea " "rsi, [rsp + " W!() "]"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + Q!(" mov " "rax, [rsp + " V!() "]"), + Q!(" add " "rax, 1"), + Q!(" mov " "rbx, [rsp + " V!() "+ 8]"), + Q!(" adc " "rbx, 0"), + Q!(" mov " "rcx, [rsp + " V!() "+ 16]"), + Q!(" adc " "rcx, 0"), + Q!(" mov " "rdx, [rsp + " V!() "+ 24]"), + Q!(" adc " "rdx, 0"), + Q!(" mov " "[rsp + " V!() "], rax"), + Q!(" mov " "[rsp + " V!() "+ 8], rbx"), + Q!(" mov " "[rsp + " V!() "+ 16], rcx"), + Q!(" mov " "[rsp + " V!() "+ 24], rdx"), + + Q!(" lea " "rdi, [rsp + " W!() "]"), + Q!(" lea " "rsi, [rsp + " U!() "]"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + // Get s = w^{252-3} as a candidate inverse square root 1/sqrt(w). + // This power tower computation is the same as bignum_invsqrt_p25519 + + Q!(" lea " "rdi, [rsp + " T!() "]"), + Q!(" mov " "rsi, 1"), + Q!(" lea " "rdx, [rsp + " W!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " T!() "]"), + Q!(" lea " "rsi, [rsp + " T!() "]"), + Q!(" lea " "rdx, [rsp + " W!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 2"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " T!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 1"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " V!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " W!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 5"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " T!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 10"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " T!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 5"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " V!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 25"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " T!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 50"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " T!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 25"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " V!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 125"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " V!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" mov " "rsi, 2"), + Q!(" lea " "rdx, [rsp + " V!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " W!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + // Compute v' = s^2 * w to discriminate whether the square root sqrt(u/v) + // exists, in which case we should get 0, 1 or -1. + + Q!(" lea " "rdi, [rsp + " V!() "]"), + Q!(" mov " "rsi, 1"), + Q!(" lea " "rdx, [rsp + " S!() "]"), + Q!(" call " Label!("edwards25519_decode_nsqr_p25519", 2, After)), + + Q!(" lea " "rdi, [rsp + " V!() "]"), + Q!(" lea " "rsi, [rsp + " V!() "]"), + Q!(" lea " "rdx, [rsp + " W!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + // Get the two candidates for sqrt(u / v), one being s = u * w^{252-3} + // and the other being t = s * j_25519 where j_25519 = sqrt(-1). + + Q!(" lea " "rdi, [rsp + " S!() "]"), + Q!(" lea " "rsi, [rsp + " U!() "]"), + Q!(" lea " "rdx, [rsp + " S!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + Q!(" mov " "rax, 0xc4ee1b274a0ea0b0"), + Q!(" mov " "[rsp + " T!() "], rax"), + Q!(" mov " "rax, 0x2f431806ad2fe478"), + Q!(" mov " "[rsp + " T!() "+ 8], rax"), + Q!(" mov " "rax, 0x2b4d00993dfbd7a7"), + Q!(" mov " "[rsp + " T!() "+ 16], rax"), + Q!(" mov " "rax, 0x2b8324804fc1df0b"), + Q!(" mov " "[rsp + " T!() "+ 24], rax"), + Q!(" lea " "rdi, [rsp + " T!() "]"), + Q!(" lea " "rsi, [rsp + " S!() "]"), + Q!(" lea " "rdx, [rsp + " T!() "]"), + Q!(" call " Label!("edwards25519_decode_mul_p25519", 3, After)), + + // rax = 0 <=> s^2 * w = 0 or 1 + + Q!(" mov " "r8, [rsp + " V!() "]"), + Q!(" mov " "r9, [rsp + " V!() "+ 8]"), + Q!(" mov " "r10, [rsp + " V!() "+ 16]"), + Q!(" mov " "r11, [rsp + " V!() "+ 24]"), + Q!(" mov " "eax, 1"), + Q!(" not " "rax"), + Q!(" and " "rax, r8"), + Q!(" or " "rax, r9"), + Q!(" or " "rax, r10"), + Q!(" or " "rax, r11"), + + // r8 = 0 <=> s^2 * w = -1 (mod p_25519, i.e. s^2 * w = 2^255 - 20) + + Q!(" add " "r8, 20"), + Q!(" not " "r9"), + Q!(" not " "r10"), + Q!(" bts " "r11, 63"), + Q!(" add " "r11, 1"), + Q!(" or " "r8, r9"), + Q!(" or " "r10, r11"), + Q!(" or " "r8, r10"), + + // If s^2 * w is not 0 or 1 then replace s by t + + Q!(" test " "rax, rax"), + + Q!(" mov " "r12, [rsp + " S!() "]"), + Q!(" mov " "rbx, [rsp + " T!() "]"), + Q!(" cmovnz " "r12, rbx"), + Q!(" mov " "r13, [rsp + " S!() "+ 8]"), + Q!(" mov " "rbx, [rsp + " T!() "+ 8]"), + Q!(" cmovnz " "r13, rbx"), + Q!(" mov " "r14, [rsp + " S!() "+ 16]"), + Q!(" mov " "rbx, [rsp + " T!() "+ 16]"), + Q!(" cmovnz " "r14, rbx"), + Q!(" mov " "r15, [rsp + " S!() "+ 24]"), + Q!(" mov " "rbx, [rsp + " T!() "+ 24]"), + Q!(" cmovnz " "r15, rbx"), + Q!(" mov " "[rsp + " S!() "], r12"), + Q!(" mov " "[rsp + " S!() "+ 8], r13"), + Q!(" mov " "[rsp + " S!() "+ 16], r14"), + Q!(" mov " "[rsp + " S!() "+ 24], r15"), + + // Check invalidity, occurring if s^2 * w is not in {0,1,-1} + + Q!(" cmovz " "r8, rax"), + Q!(" neg " "r8"), + Q!(" sbb " "r8, r8"), + Q!(" neg " "r8"), + Q!(" or " badun!() ", r8"), + + // Let [r11;r10;r9;r8] = s and [r15;r14;r13;r12] = p_25519 - s + + Q!(" mov " "r8, [rsp + " S!() "]"), + Q!(" mov " "r12, -19"), + Q!(" sub " "r12, r8"), + Q!(" mov " "r9, [rsp + " S!() "+ 8]"), + Q!(" mov " "r13, -1"), + Q!(" sbb " "r13, r9"), + Q!(" mov " "r10, [rsp + " S!() "+ 16]"), + Q!(" mov " "r14, -1"), + Q!(" sbb " "r14, r10"), + Q!(" mov " "r11, [rsp + " S!() "+ 24]"), + Q!(" mov " "r15, 0x7FFFFFFFFFFFFFFF"), + Q!(" sbb " "r15, r11"), + + // Decide whether a flip is apparently indicated, s_0 <=> sgnbit + // Decide also if s = 0 by OR-ing its digits. Now if a flip is indicated: + // - if s = 0 then mark as invalid + // - if s <> 0 then indeed flip + + Q!(" mov " "ecx, 1"), + Q!(" and " "rcx, r8"), + Q!(" xor " "rcx, " sgnbit!()), + Q!(" mov " "rdx, " badun!()), + Q!(" mov " "rsi, rdx"), + Q!(" or " "rdx, rcx"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rax, r8"), + Q!(" mov " "rbx, r9"), + Q!(" or " "rax, r10"), + Q!(" or " "rbx, r11"), + Q!(" or " "rax, rbx"), + Q!(" cmovz " "rcx, rbp"), + Q!(" cmovnz " "rdx, rsi"), + + // Actual selection of x as s or -s, copying of y and return of validity + + Q!(" test " "rcx, rcx"), + + Q!(" cmovnz " "r8, r12"), + Q!(" cmovnz " "r9, r13"), + Q!(" cmovnz " "r10, r14"), + Q!(" cmovnz " "r11, r15"), + + Q!(" mov " "rdi, " res!()), + Q!(" mov " "[rdi], r8"), + Q!(" mov " "[rdi + 8], r9"), + Q!(" mov " "[rdi + 16], r10"), + Q!(" mov " "[rdi + 24], r11"), + Q!(" mov " "rcx, [rsp + " Y!() "]"), + Q!(" mov " "[rdi + 32], rcx"), + Q!(" mov " "rcx, [rsp + " Y!() "+ 8]"), + Q!(" mov " "[rdi + 40], rcx"), + Q!(" mov " "rcx, [rsp + " Y!() "+ 16]"), + Q!(" mov " "[rdi + 48], rcx"), + Q!(" mov " "rcx, [rsp + " Y!() "+ 24]"), + Q!(" mov " "[rdi + 56], rcx"), + + Q!(" mov " "rax, rdx"), + + // Restore stack and registers + + Q!(" add " "rsp, " NSPACE!()), + + Q!(" pop " "r15"), + Q!(" pop " "r14"), + Q!(" pop " "r13"), + Q!(" pop " "r12"), + Q!(" pop " "rbp"), + Q!(" pop " "rbx"), + // proc hoisting in -> ret after edwards25519_decode_loop + Q!(" jmp " Label!("hoist_finish", 4, After)), + + // ************************************************************* + // Local z = x * y + // ************************************************************* + + Q!(Label!("edwards25519_decode_mul_p25519", 3) ":"), + Q!(" mov " "rcx, rdx"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rdx, [rcx]"), + Q!(" mulx " "r9, r8, [rsi]"), + Q!(" mulx " "r10, rax, [rsi + 0x8]"), + Q!(" add " "r9, rax"), + Q!(" mulx " "r11, rax, [rsi + 0x10]"), + Q!(" adc " "r10, rax"), + Q!(" mulx " "r12, rax, [rsi + 0x18]"), + Q!(" adc " "r11, rax"), + Q!(" adc " "r12, rbp"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rdx, [rcx + 0x8]"), + Q!(" mulx " "rbx, rax, [rsi]"), + Q!(" adcx " "r9, rax"), + Q!(" adox " "r10, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x8]"), + Q!(" adcx " "r10, rax"), + Q!(" adox " "r11, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x10]"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rbx"), + Q!(" mulx " "r13, rax, [rsi + 0x18]"), + Q!(" adcx " "r12, rax"), + Q!(" adox " "r13, rbp"), + Q!(" adc " "r13, rbp"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rdx, [rcx + 0x10]"), + Q!(" mulx " "rbx, rax, [rsi]"), + Q!(" adcx " "r10, rax"), + Q!(" adox " "r11, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x8]"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x10]"), + Q!(" adcx " "r12, rax"), + Q!(" adox " "r13, rbx"), + Q!(" mulx " "r14, rax, [rsi + 0x18]"), + Q!(" adcx " "r13, rax"), + Q!(" adox " "r14, rbp"), + Q!(" adc " "r14, rbp"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rdx, [rcx + 0x18]"), + Q!(" mulx " "rbx, rax, [rsi]"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rbx"), + Q!(" mulx " "r15, rcx, [rsi + 0x18]"), + Q!(" mulx " "rbx, rax, [rsi + 0x8]"), + Q!(" adcx " "r12, rax"), + Q!(" adox " "r13, rbx"), + Q!(" mulx " "rbx, rax, [rsi + 0x10]"), + Q!(" adcx " "r13, rax"), + Q!(" adox " "r14, rbx"), + Q!(" mov " "edx, 0x26"), + Q!(" mulx " "rbx, rax, r15"), + Q!(" adcx " "r14, rcx"), + Q!(" adox " "r15, rbp"), + Q!(" adc " "r15, rbp"), + Q!(" add " "rax, r11"), + Q!(" adc " "rbx, rbp"), + Q!(" bt " "rax, 0x3f"), + Q!(" adc " "rbx, rbx"), + Q!(" lea " "rcx, [rbx + 0x1]"), + Q!(" imul " "rcx, rcx, 0x13"), + Q!(" xor " "ebp, ebp"), + Q!(" adox " "r8, rcx"), + Q!(" mulx " "rbx, rax, r12"), + Q!(" adcx " "r8, rax"), + Q!(" adox " "r9, rbx"), + Q!(" mulx " "rbx, rax, r13"), + Q!(" adcx " "r9, rax"), + Q!(" adox " "r10, rbx"), + Q!(" mulx " "rbx, rax, r14"), + Q!(" adcx " "r10, rax"), + Q!(" adox " "r11, rbx"), + Q!(" mulx " "rbx, rax, r15"), + Q!(" adc " "r11, rax"), + Q!(" shl " "rcx, 0x3f"), + Q!(" cmp " "r11, rcx"), + Q!(" mov " "eax, 0x13"), + Q!(" cmovns " "rax, rbp"), + Q!(" sub " "r8, rax"), + Q!(" sbb " "r9, rbp"), + Q!(" sbb " "r10, rbp"), + Q!(" sbb " "r11, rbp"), + Q!(" btr " "r11, 0x3f"), + Q!(" mov " "[rdi], r8"), + Q!(" mov " "[rdi + 0x8], r9"), + Q!(" mov " "[rdi + 0x10], r10"), + Q!(" mov " "[rdi + 0x18], r11"), + Q!(" ret " ), + + // ************************************************************* + // Local z = 2^n * x + // ************************************************************* + + Q!(Label!("edwards25519_decode_nsqr_p25519", 2) ":"), + + // Copy input argument into q + + Q!(" mov " "rax, [rdx]"), + Q!(" mov " "rbx, [rdx + 8]"), + Q!(" mov " "rcx, [rdx + 16]"), + Q!(" mov " "rdx, [rdx + 24]"), + Q!(" mov " "[rsp + " Q8!() "], rax"), + Q!(" mov " "[rsp + " Q8!() "+ 8], rbx"), + Q!(" mov " "[rsp + " Q8!() "+ 16], rcx"), + Q!(" mov " "[rsp + " Q8!() "+ 24], rdx"), + + // Main squaring loop, accumulating in u consistently and + // only ensuring the intermediates are < 2 * p_25519 = 2^256 - 38 + + Q!(Label!("edwards25519_decode_loop", 5) ":"), + Q!(" mov " "rdx, [rsp + " Q8!() "]"), + Q!(" mulx " "r15, r8, rdx"), + Q!(" mulx " "r10, r9, [rsp + " Q8!() "+ 0x8]"), + Q!(" mulx " "r12, r11, [rsp + " Q8!() "+ 0x18]"), + Q!(" mov " "rdx, [rsp + " Q8!() "+ 0x10]"), + Q!(" mulx " "r14, r13, [rsp + " Q8!() "+ 0x18]"), + Q!(" xor " "ebx, ebx"), + Q!(" mulx " "rcx, rax, [rsp + " Q8!() "]"), + Q!(" adcx " "r10, rax"), + Q!(" adox " "r11, rcx"), + Q!(" mulx " "rcx, rax, [rsp + " Q8!() "+ 0x8]"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rcx"), + Q!(" mov " "rdx, [rsp + " Q8!() "+ 0x18]"), + Q!(" mulx " "rcx, rax, [rsp + " Q8!() "+ 0x8]"), + Q!(" adcx " "r12, rax"), + Q!(" adox " "r13, rcx"), + Q!(" adcx " "r13, rbx"), + Q!(" adox " "r14, rbx"), + Q!(" adc " "r14, rbx"), + Q!(" xor " "ebx, ebx"), + Q!(" adcx " "r9, r9"), + Q!(" adox " "r9, r15"), + Q!(" mov " "rdx, [rsp + " Q8!() "+ 0x8]"), + Q!(" mulx " "rdx, rax, rdx"), + Q!(" adcx " "r10, r10"), + Q!(" adox " "r10, rax"), + Q!(" adcx " "r11, r11"), + Q!(" adox " "r11, rdx"), + Q!(" mov " "rdx, [rsp + " Q8!() "+ 0x10]"), + Q!(" mulx " "rdx, rax, rdx"), + Q!(" adcx " "r12, r12"), + Q!(" adox " "r12, rax"), + Q!(" adcx " "r13, r13"), + Q!(" adox " "r13, rdx"), + Q!(" mov " "rdx, [rsp + " Q8!() "+ 0x18]"), + Q!(" mulx " "r15, rax, rdx"), + Q!(" adcx " "r14, r14"), + Q!(" adox " "r14, rax"), + Q!(" adcx " "r15, rbx"), + Q!(" adox " "r15, rbx"), + Q!(" mov " "edx, 0x26"), + Q!(" xor " "ebx, ebx"), + Q!(" mulx " "rcx, rax, r12"), + Q!(" adcx " "r8, rax"), + Q!(" adox " "r9, rcx"), + Q!(" mulx " "rcx, rax, r13"), + Q!(" adcx " "r9, rax"), + Q!(" adox " "r10, rcx"), + Q!(" mulx " "rcx, rax, r14"), + Q!(" adcx " "r10, rax"), + Q!(" adox " "r11, rcx"), + Q!(" mulx " "r12, rax, r15"), + Q!(" adcx " "r11, rax"), + Q!(" adox " "r12, rbx"), + Q!(" adcx " "r12, rbx"), + Q!(" shld " "r12, r11, 0x1"), + Q!(" btr " "r11, 0x3f"), + Q!(" mov " "edx, 0x13"), + Q!(" imul " "rdx, r12"), + Q!(" add " "r8, rdx"), + Q!(" adc " "r9, rbx"), + Q!(" adc " "r10, rbx"), + Q!(" adc " "r11, rbx"), + Q!(" mov " "[rsp + " Q8!() "], r8"), + Q!(" mov " "[rsp + " Q8!() "+ 0x8], r9"), + Q!(" mov " "[rsp + " Q8!() "+ 0x10], r10"), + Q!(" mov " "[rsp + " Q8!() "+ 0x18], r11"), + + // Loop as applicable + + Q!(" dec " "rsi"), + Q!(" jnz " Label!("edwards25519_decode_loop", 5, Before)), + + // We know the intermediate result x < 2^256 - 38, and now we do strict + // modular reduction mod 2^255 - 19. Note x < 2^255 - 19 <=> x + 19 < 2^255 + // which is equivalent to a "ns" condition. We just use the results where + // they were in registers [r11;r10;r9;r8] instead of re-loading them. + + Q!(" mov " "eax, 19"), + Q!(" xor " "ebx, ebx"), + Q!(" xor " "ecx, ecx"), + Q!(" xor " "edx, edx"), + Q!(" add " "rax, r8"), + Q!(" adc " "rbx, r9"), + Q!(" adc " "rcx, r10"), + Q!(" adc " "rdx, r11"), + + Q!(" cmovns " "rax, r8"), + Q!(" cmovns " "rbx, r9"), + Q!(" cmovns " "rcx, r10"), + Q!(" cmovns " "rdx, r11"), + Q!(" btr " "rdx, 63"), + Q!(" mov " "[rdi], rax"), + Q!(" mov " "[rdi + 8], rbx"), + Q!(" mov " "[rdi + 16], rcx"), + Q!(" mov " "[rdi + 24], rdx"), + Q!(" ret " ), + Q!(Label!("hoist_finish", 4) ":"), + inout("rdi") z.as_mut_ptr() => _, + inout("rsi") c.as_ptr() => _, + out("rax") ret, + // clobbers + out("r10") _, + out("r11") _, + out("r12") _, + out("r13") _, + out("r14") _, + out("r15") _, + out("r8") _, + out("r9") _, + out("rcx") _, + out("rdx") _, + ) + }; + ret == 0 +} diff --git a/graviola/src/low/x86_64/edwards25519_scalarmulbase.rs b/graviola/src/low/x86_64/edwards25519_scalarmulbase.rs new file mode 100644 index 000000000..54fdca5c7 --- /dev/null +++ b/graviola/src/low/x86_64/edwards25519_scalarmulbase.rs @@ -0,0 +1,8936 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Scalar multiplication for the edwards25519 standard basepoint +// Input scalar[4]; output res[8] +// +// extern void edwards25519_scalarmulbase +// (uint64_t res[static 8],const uint64_t scalar[static 4]); +// +// Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar +// Microsoft x64 ABI: RCX = res, RDX = scalar +// ---------------------------------------------------------------------------- + +// Size of individual field elements + +macro_rules! NUMSIZE { + () => { + "32" + }; +} + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. +// The result "resx" assumes the "res" pointer has been preloaded into rbp. + +macro_rules! resx { () => { Q!("rbp + (0 * " NUMSIZE!() ")") } } +macro_rules! resy { () => { Q!("rbp + (1 * " NUMSIZE!() ")") } } + +macro_rules! scalar { () => { Q!("rsp + (0 * " NUMSIZE!() ")") } } + +macro_rules! tabent { () => { Q!("rsp + (1 * " NUMSIZE!() ")") } } +macro_rules! ymx_2 { () => { Q!("rsp + (1 * " NUMSIZE!() ")") } } +macro_rules! xpy_2 { () => { Q!("rsp + (2 * " NUMSIZE!() ")") } } +macro_rules! kxy_2 { () => { Q!("rsp + (3 * " NUMSIZE!() ")") } } + +macro_rules! t0 { () => { Q!("rsp + (4 * " NUMSIZE!() ")") } } +macro_rules! t1 { () => { Q!("rsp + (5 * " NUMSIZE!() ")") } } +macro_rules! t2 { () => { Q!("rsp + (6 * " NUMSIZE!() ")") } } +macro_rules! t3 { () => { Q!("rsp + (7 * " NUMSIZE!() ")") } } +macro_rules! t4 { () => { Q!("rsp + (8 * " NUMSIZE!() ")") } } +macro_rules! t5 { () => { Q!("rsp + (9 * " NUMSIZE!() ")") } } + +macro_rules! acc { () => { Q!("rsp + (10 * " NUMSIZE!() ")") } } +macro_rules! x_1 { () => { Q!("rsp + (10 * " NUMSIZE!() ")") } } +macro_rules! y_1 { () => { Q!("rsp + (11 * " NUMSIZE!() ")") } } +macro_rules! z_1 { () => { Q!("rsp + (12 * " NUMSIZE!() ")") } } +macro_rules! w_1 { () => { Q!("rsp + (13 * " NUMSIZE!() ")") } } +macro_rules! x_3 { () => { Q!("rsp + (10 * " NUMSIZE!() ")") } } +macro_rules! y_3 { () => { Q!("rsp + (11 * " NUMSIZE!() ")") } } +macro_rules! z_3 { () => { Q!("rsp + (12 * " NUMSIZE!() ")") } } +macro_rules! w_3 { () => { Q!("rsp + (13 * " NUMSIZE!() ")") } } + +// Stable homes for the input result pointer, and other variables + +macro_rules! res { () => { Q!("QWORD PTR [rsp + 14 * " NUMSIZE!() "]") } } + +macro_rules! i { () => { Q!("QWORD PTR [rsp + 14 * " NUMSIZE!() "+ 8]") } } + +macro_rules! bias { () => { Q!("QWORD PTR [rsp + 14 * " NUMSIZE!() "+ 16]") } } + +macro_rules! bf { () => { Q!("QWORD PTR [rsp + 14 * " NUMSIZE!() "+ 24]") } } +macro_rules! ix { () => { Q!("QWORD PTR [rsp + 14 * " NUMSIZE!() "+ 24]") } } + +macro_rules! tab { () => { Q!("QWORD PTR [rsp + 15 * " NUMSIZE!() "]") } } + +// Total size to reserve on the stack + +macro_rules! NSPACE { () => { Q!("(15 * " NUMSIZE!() "+ 8)") } } + +// Syntactic variants to make x86_att version simpler to generate + +macro_rules! SCALAR { + () => { + "0" + }; +} +macro_rules! TABENT { () => { Q!("(1 * " NUMSIZE!() ")") } } +macro_rules! ACC { () => { Q!("(10 * " NUMSIZE!() ")") } } +macro_rules! X3 { () => { Q!("(10 * " NUMSIZE!() ")") } } +macro_rules! Z3 { () => { Q!("(12 * " NUMSIZE!() ")") } } +macro_rules! W3 { () => { Q!("(13 * " NUMSIZE!() ")") } } + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519. + +macro_rules! mul_p25519 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "xor esi, esi;\n" + "mov rdx, [" $P2 "];\n" + "mulx r9, r8, [" $P1 "];\n" + "mulx r10, rax, [" $P1 "+ 0x8];\n" + "add r9, rax;\n" + "mulx r11, rax, [" $P1 "+ 0x10];\n" + "adc r10, rax;\n" + "mulx r12, rax, [" $P1 "+ 0x18];\n" + "adc r11, rax;\n" + "adc r12, rsi;\n" + "xor esi, esi;\n" + "mov rdx, [" $P2 "+ 0x8];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r9, rax;\n" + "adox r10, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx r13, rax, [" $P1 "+ 0x18];\n" + "adcx r12, rax;\n" + "adox r13, rsi;\n" + "adcx r13, rsi;\n" + "xor esi, esi;\n" + "mov rdx, [" $P2 "+ 0x10];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r12, rax;\n" + "adox r13, rbx;\n" + "mulx r14, rax, [" $P1 "+ 0x18];\n" + "adcx r13, rax;\n" + "adox r14, rsi;\n" + "adcx r14, rsi;\n" + "xor esi, esi;\n" + "mov rdx, [" $P2 "+ 0x18];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r12, rax;\n" + "adox r13, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r13, rax;\n" + "adox r14, rbx;\n" + "mulx r15, rax, [" $P1 "+ 0x18];\n" + "adcx r14, rax;\n" + "adox r15, rsi;\n" + "adcx r15, rsi;\n" + "mov edx, 0x26;\n" + "xor esi, esi;\n" + "mulx rbx, rax, r12;\n" + "adcx r8, rax;\n" + "adox r9, rbx;\n" + "mulx rbx, rax, r13;\n" + "adcx r9, rax;\n" + "adox r10, rbx;\n" + "mulx rbx, rax, r14;\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx r12, rax, r15;\n" + "adcx r11, rax;\n" + "adox r12, rsi;\n" + "adcx r12, rsi;\n" + "shld r12, r11, 0x1;\n" + "mov edx, 0x13;\n" + "inc r12;\n" + "bts r11, 63;\n" + "mulx rbx, rax, r12;\n" + "add r8, rax;\n" + "adc r9, rbx;\n" + "adc r10, rsi;\n" + "adc r11, rsi;\n" + "sbb rax, rax;\n" + "not rax;\n" + "and rax, rdx;\n" + "sub r8, rax;\n" + "sbb r9, rsi;\n" + "sbb r10, rsi;\n" + "sbb r11, rsi;\n" + "btr r11, 63;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +macro_rules! mul_4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "];\n" + "mulx r9, r8, [" $P1 "];\n" + "mulx r10, rax, [" $P1 "+ 0x8];\n" + "add r9, rax;\n" + "mulx r11, rax, [" $P1 "+ 0x10];\n" + "adc r10, rax;\n" + "mulx r12, rax, [" $P1 "+ 0x18];\n" + "adc r11, rax;\n" + "adc r12, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x8];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r9, rax;\n" + "adox r10, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx r13, rax, [" $P1 "+ 0x18];\n" + "adcx r12, rax;\n" + "adox r13, rcx;\n" + "adcx r13, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x10];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r12, rax;\n" + "adox r13, rbx;\n" + "mulx r14, rax, [" $P1 "+ 0x18];\n" + "adcx r13, rax;\n" + "adox r14, rcx;\n" + "adcx r14, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x18];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r12, rax;\n" + "adox r13, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r13, rax;\n" + "adox r14, rbx;\n" + "mulx r15, rax, [" $P1 "+ 0x18];\n" + "adcx r14, rax;\n" + "adox r15, rcx;\n" + "adcx r15, rcx;\n" + "mov edx, 0x26;\n" + "xor ecx, ecx;\n" + "mulx rbx, rax, r12;\n" + "adcx r8, rax;\n" + "adox r9, rbx;\n" + "mulx rbx, rax, r13;\n" + "adcx r9, rax;\n" + "adox r10, rbx;\n" + "mulx rbx, rax, r14;\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx r12, rax, r15;\n" + "adcx r11, rax;\n" + "adox r12, rcx;\n" + "adcx r12, rcx;\n" + "shld r12, r11, 0x1;\n" + "btr r11, 0x3f;\n" + "mov edx, 0x13;\n" + "imul rdx, r12;\n" + "add r8, rdx;\n" + "adc r9, rcx;\n" + "adc r10, rcx;\n" + "adc r11, rcx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +macro_rules! sub_twice4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "mov r8, [" $P1 "];\n" + "xor ebx, ebx;\n" + "sub r8, [" $P2 "];\n" + "mov r9, [" $P1 "+ 8];\n" + "sbb r9, [" $P2 "+ 8];\n" + "mov ecx, 38;\n" + "mov r10, [" $P1 "+ 16];\n" + "sbb r10, [" $P2 "+ 16];\n" + "mov rax, [" $P1 "+ 24];\n" + "sbb rax, [" $P2 "+ 24];\n" + "cmovnc rcx, rbx;\n" + "sub r8, rcx;\n" + "sbb r9, rbx;\n" + "sbb r10, rbx;\n" + "sbb rax, rbx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 8], r9;\n" + "mov [" $P0 "+ 16], r10;\n" + "mov [" $P0 "+ 24], rax" + )} +} + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +macro_rules! add_twice4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "mov r8, [" $P1 "];\n" + "xor ecx, ecx;\n" + "add r8, [" $P2 "];\n" + "mov r9, [" $P1 "+ 0x8];\n" + "adc r9, [" $P2 "+ 0x8];\n" + "mov r10, [" $P1 "+ 0x10];\n" + "adc r10, [" $P2 "+ 0x10];\n" + "mov r11, [" $P1 "+ 0x18];\n" + "adc r11, [" $P2 "+ 0x18];\n" + "mov eax, 38;\n" + "cmovnc rax, rcx;\n" + "add r8, rax;\n" + "adc r9, rcx;\n" + "adc r10, rcx;\n" + "adc r11, rcx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +macro_rules! double_twice4 { + ($P0:expr, $P1:expr) => { Q!( + "mov r8, [" $P1 "];\n" + "xor ecx, ecx;\n" + "add r8, r8;\n" + "mov r9, [" $P1 "+ 0x8];\n" + "adc r9, r9;\n" + "mov r10, [" $P1 "+ 0x10];\n" + "adc r10, r10;\n" + "mov r11, [" $P1 "+ 0x18];\n" + "adc r11, r11;\n" + "mov eax, 38;\n" + "cmovnc rax, rcx;\n" + "add r8, rax;\n" + "adc r9, rcx;\n" + "adc r10, rcx;\n" + "adc r11, rcx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +/// Scalar multiplication for the edwards25519 standard basepoint +/// +/// Input scalar[4]; output res[8] +/// +/// Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is +/// the standard basepoint for the edwards25519 (Ed25519) curve. +pub(crate) fn edwards25519_scalarmulbase(res: &mut [u64; 8], scalar: &[u64; 4]) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + Q!(" endbr64 " ), + + // In this case the Windows form literally makes a subroutine call. + // This avoids hassle arising from keeping code and data together. + + + + // Save registers, make room for temps, preserve input arguments. + + Q!(" push " "rbx"), + Q!(" push " "rbp"), + Q!(" push " "r12"), + Q!(" push " "r13"), + Q!(" push " "r14"), + Q!(" push " "r15"), + Q!(" sub " "rsp, " NSPACE!()), + + // Move the output pointer to a stable place + + Q!(" mov " res!() ", rdi"), + + // Copy the input scalar x to its local variable while reducing it + // modulo 2^252 + m where m = 27742317777372353535851937790883648493; + // this is the order of the basepoint so this doesn't change the result. + // First do q = floor(x/2^252) and x' = x - q * (2^252 + m), which gives + // an initial result -15 * m <= x' < 2^252 + + Q!(" mov " "r8, [rsi]"), + Q!(" mov " "r9, [rsi + 8]"), + Q!(" mov " "r10, [rsi + 16]"), + Q!(" mov " "r11, [rsi + 24]"), + + Q!(" mov " "rcx, r11"), + Q!(" shr " "rcx, 60"), + + Q!(" mov " "rax, 0x5812631a5cf5d3ed"), + Q!(" mul " "rcx"), + Q!(" mov " "r12, rax"), + Q!(" mov " "r13, rdx"), + Q!(" mov " "rax, 0x14def9dea2f79cd6"), + Q!(" mul " "rcx"), + Q!(" add " "r13, rax"), + Q!(" adc " "rdx, 0"), + Q!(" shl " "rcx, 60"), + + Q!(" sub " "r8, r12"), + Q!(" sbb " "r9, r13"), + Q!(" sbb " "r10, rdx"), + Q!(" sbb " "r11, rcx"), + + // If x' < 0 then just directly negate it; this makes sure the + // reduced argument is strictly 0 <= x' < 2^252, but now we need + // to record (done via bit 255 of the reduced scalar, which is + // ignored in the main loop) when we negated so we can flip + // the end result to compensate. + + Q!(" sbb " "rax, rax"), + + Q!(" xor " "r8, rax"), + Q!(" xor " "r9, rax"), + Q!(" xor " "r10, rax"), + Q!(" xor " "r11, rax"), + + Q!(" neg " "rax"), + Q!(" adc " "r8, 0"), + Q!(" adc " "r9, 0"), + Q!(" adc " "r10, 0"), + Q!(" adc " "r11, 0"), + + Q!(" shl " "rax, 63"), + Q!(" or " "r11, rax"), + + // And before we store the scalar, test and reset bit 251 to + // initialize the main loop just below. + + Q!(" mov " "[rsp + " SCALAR!() "], r8"), + Q!(" mov " "[rsp + " SCALAR!() "+ 8], r9"), + Q!(" mov " "[rsp + " SCALAR!() "+ 16], r10"), + Q!(" btr " "r11, 59"), + Q!(" mov " "[rsp + " SCALAR!() "+ 24], r11"), + + // The main part of the computation is in extended-projective coordinates + // (X,Y,Z,T), representing an affine point on the edwards25519 curve + // (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). + // In comments B means the standard basepoint (x,4/5) = + // (0x216....f25d51a,0x6666..666658). + // + // Initialize accumulator "acc" to either 0 or 2^251 * B depending on + // bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. + + Q!(" lea " "r10, [rip + {edwards25519_scalarmulbase_0g}]"), + Q!(" lea " "r11, [rip + {edwards25519_scalarmulbase_251g}]"), + + Q!(" mov " "rax, [r10]"), + Q!(" mov " "rcx, [r11]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "], rax"), + + Q!(" mov " "rax, [r10 + 8 * 1]"), + Q!(" mov " "rcx, [r11 + 8 * 1]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 8], rax"), + + Q!(" mov " "rax, [r10 + 8 * 2]"), + Q!(" mov " "rcx, [r11 + 8 * 2]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 16], rax"), + + Q!(" mov " "rax, [r10 + 8 * 3]"), + Q!(" mov " "rcx, [r11 + 8 * 3]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 24], rax"), + + Q!(" mov " "rax, [r10 + 8 * 4]"), + Q!(" mov " "rcx, [r11 + 8 * 4]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 32], rax"), + + Q!(" mov " "rax, [r10 + 8 * 5]"), + Q!(" mov " "rcx, [r11 + 8 * 5]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 40], rax"), + + Q!(" mov " "rax, [r10 + 8 * 6]"), + Q!(" mov " "rcx, [r11 + 8 * 6]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 48], rax"), + + Q!(" mov " "rax, [r10 + 8 * 7]"), + Q!(" mov " "rcx, [r11 + 8 * 7]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 56], rax"), + + Q!(" mov " "eax, 1"), + Q!(" mov " "[rsp + " ACC!() "+ 64], rax"), + Q!(" mov " "eax, 0"), + Q!(" mov " "[rsp + " ACC!() "+ 72], rax"), + Q!(" mov " "[rsp + " ACC!() "+ 80], rax"), + Q!(" mov " "[rsp + " ACC!() "+ 88], rax"), + + Q!(" mov " "rax, [r10 + 8 * 8]"), + Q!(" mov " "rcx, [r11 + 8 * 8]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 96], rax"), + + Q!(" mov " "rax, [r10 + 8 * 9]"), + Q!(" mov " "rcx, [r11 + 8 * 9]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 104], rax"), + + Q!(" mov " "rax, [r10 + 8 * 10]"), + Q!(" mov " "rcx, [r11 + 8 * 10]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 112], rax"), + + Q!(" mov " "rax, [r10 + 8 * 11]"), + Q!(" mov " "rcx, [r11 + 8 * 11]"), + Q!(" cmovc " "rax, rcx"), + Q!(" mov " "[rsp + " ACC!() "+ 120], rax"), + + // The counter "i" tracks the bit position for which the scalar has + // already been absorbed, starting at 0 and going up in chunks of 4. + // + // The pointer "tab" points at the current block of the table for + // multiples (2^i * j) * B at the current bit position i; 1 <= j <= 8. + // + // The bias is always either 0 and 1 and needs to be added to the + // partially processed scalar implicitly. This is used to absorb 4 bits + // of scalar per iteration from 3-bit table indexing by exploiting + // negation: (16 * h + l) * B = (16 * (h + 1) - (16 - l)) * B is used + // when l >= 9. Note that we can't have any bias left over at the + // end because we made sure bit 251 is clear in the reduced scalar. + + Q!(" mov " i!() ", 0"), + Q!(" lea " "rax, [rip + {edwards25519_scalarmulbase_gtable}]"), + Q!(" mov " tab!() ", rax"), + Q!(" mov " bias!() ", 0"), + + // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + + Q!(Label!("edwards25519_scalarmulbase_scalarloop", 2) ":"), + + // Look at the next 4-bit field "bf", adding the previous bias as well. + // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, + // setting the bias to 1 for the next iteration in the latter case. + + Q!(" mov " "rax, " i!()), + Q!(" mov " "rcx, rax"), + Q!(" shr " "rax, 6"), + Q!(" mov " "rax, [rsp + 8 * rax]"), + Q!(" shr " "rax, cl"), + Q!(" and " "rax, 15"), + Q!(" add " "rax, " bias!()), + Q!(" mov " bf!() ", rax"), + + Q!(" cmp " bf!() ", 9"), + Q!(" sbb " "rax, rax"), + Q!(" inc " "rax"), + Q!(" mov " bias!() ", rax"), + + Q!(" mov " "rdi, 16"), + Q!(" sub " "rdi, " bf!()), + Q!(" cmp " bias!() ", 0"), + Q!(" cmovz " "rdi, " bf!()), + Q!(" mov " ix!() ", rdi"), + + // Perform constant-time lookup in the table to get element number "ix". + // The table entry for the affine point (x,y) is actually a triple + // (y - x,x + y,2 * d * x * y) to precompute parts of the addition. + // Note that "ix" can be 0, so we set up the appropriate identity first. + + Q!(" mov " "eax, 1"), + Q!(" xor " "ebx, ebx"), + Q!(" xor " "ecx, ecx"), + Q!(" xor " "edx, edx"), + Q!(" mov " "r8d, 1"), + Q!(" xor " "r9d, r9d"), + Q!(" xor " "r10d, r10d"), + Q!(" xor " "r11d, r11d"), + Q!(" xor " "r12d, r12d"), + Q!(" xor " "r13d, r13d"), + Q!(" xor " "r14d, r14d"), + Q!(" xor " "r15d, r15d"), + + Q!(" mov " "rbp, " tab!()), + + Q!(" cmp " ix!() ", 1"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " ix!() ", 2"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " ix!() ", 3"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " ix!() ", 4"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " ix!() ", 5"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " ix!() ", 6"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " ix!() ", 7"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " ix!() ", 8"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + + Q!(" add " "rbp, 96"), + Q!(" mov " tab!() ", rbp"), + + // We now have the triple from the table in registers as follows + // + // [rdx;rcx;rbx;rax] = y - x + // [r11;r10;r9;r8] = x + y + // [r15;r14;r13;r12] = 2 * d * x * y + // + // In case bias = 1 we need to negate this. For Edwards curves + // -(x,y) = (-x,y), i.e. we need to negate the x coordinate. + // In this processed encoding, that amounts to swapping the + // first two fields and negating the third. + // + // The optional negation here also pretends bias = 0 whenever + // ix = 0 so that it doesn't need to handle the case of zero + // inputs, since no non-trivial table entries are zero. Note + // that in the zero case the whole negation is trivial, and + // so indeed is the swapping. + + Q!(" cmp " bias!() ", 0"), + + Q!(" mov " "rsi, rax"), + Q!(" cmovnz " "rsi, r8"), + Q!(" cmovnz " "r8, rax"), + Q!(" mov " "[rsp + " TABENT!() "], rsi"), + Q!(" mov " "[rsp + " TABENT!() "+ 32], r8"), + + Q!(" mov " "rsi, rbx"), + Q!(" cmovnz " "rsi, r9"), + Q!(" cmovnz " "r9, rbx"), + Q!(" mov " "[rsp + " TABENT!() "+ 8], rsi"), + Q!(" mov " "[rsp + " TABENT!() "+ 40], r9"), + + Q!(" mov " "rsi, rcx"), + Q!(" cmovnz " "rsi, r10"), + Q!(" cmovnz " "r10, rcx"), + Q!(" mov " "[rsp + " TABENT!() "+ 16], rsi"), + Q!(" mov " "[rsp + " TABENT!() "+ 48], r10"), + + Q!(" mov " "rsi, rdx"), + Q!(" cmovnz " "rsi, r11"), + Q!(" cmovnz " "r11, rdx"), + Q!(" mov " "[rsp + " TABENT!() "+ 24], rsi"), + Q!(" mov " "[rsp + " TABENT!() "+ 56], r11"), + + Q!(" mov " "rax, -19"), + Q!(" mov " "rbx, -1"), + Q!(" mov " "rcx, -1"), + Q!(" mov " "rdx, 0x7fffffffffffffff"), + Q!(" sub " "rax, r12"), + Q!(" sbb " "rbx, r13"), + Q!(" sbb " "rcx, r14"), + Q!(" sbb " "rdx, r15"), + + Q!(" mov " "r8, " ix!()), + Q!(" mov " "r9, " bias!()), + Q!(" test " "r8, r8"), + Q!(" cmovz " "r9, r8"), + Q!(" test " "r9, r9"), + + Q!(" cmovz " "rax, r12"), + Q!(" cmovz " "rbx, r13"), + Q!(" cmovz " "rcx, r14"), + Q!(" cmovz " "rdx, r15"), + Q!(" mov " "[rsp + " TABENT!() "+ 64], rax"), + Q!(" mov " "[rsp + " TABENT!() "+ 72], rbx"), + Q!(" mov " "[rsp + " TABENT!() "+ 80], rcx"), + Q!(" mov " "[rsp + " TABENT!() "+ 88], rdx"), + + // Extended-projective and precomputed mixed addition. + // This is effectively the same as calling the standalone + // function edwards25519_pepadd(acc,acc,tabent), but we + // only retain slightly weaker normalization < 2 * p_25519 + // throughout the inner loop, so the computation is + // slightly different, and faster overall. + + double_twice4!(t0!(), z_1!()), + sub_twice4!(t1!(), y_1!(), x_1!()), + add_twice4!(t2!(), y_1!(), x_1!()), + mul_4!(t3!(), w_1!(), kxy_2!()), + mul_4!(t1!(), t1!(), ymx_2!()), + mul_4!(t2!(), t2!(), xpy_2!()), + sub_twice4!(t4!(), t0!(), t3!()), + add_twice4!(t0!(), t0!(), t3!()), + sub_twice4!(t5!(), t2!(), t1!()), + add_twice4!(t1!(), t2!(), t1!()), + mul_4!(z_3!(), t4!(), t0!()), + mul_4!(x_3!(), t5!(), t4!()), + mul_4!(y_3!(), t0!(), t1!()), + mul_4!(w_3!(), t5!(), t1!()), + + // End of the main loop; move on by 4 bits. + + Q!(" add " i!() ", 4"), + Q!(" cmp " i!() ", 252"), + Q!(" jc " Label!("edwards25519_scalarmulbase_scalarloop", 2, Before)), + + // Insert the optional negation of the projective X coordinate, and + // so by extension the final affine x coordinate x = X/Z and thus + // the point P = (x,y). We only know X < 2 * p_25519, so we do the + // negation as 2 * p_25519 - X to keep it nonnegative. From this + // point on we don't need any normalization of the coordinates + // except for making sure that they fit in 4 digits. + + Q!(" mov " "r8, [rsp + " X3!() "]"), + Q!(" mov " "r9, [rsp + " X3!() "+ 8]"), + Q!(" mov " "r10, [rsp + " X3!() "+ 16]"), + Q!(" mov " "r11, [rsp + " X3!() "+ 24]"), + Q!(" mov " "r12, 0xffffffffffffffda"), + Q!(" sub " "r12, r8"), + Q!(" mov " "r13, 0xffffffffffffffff"), + Q!(" sbb " "r13, r9"), + Q!(" mov " "r14, 0xffffffffffffffff"), + Q!(" sbb " "r14, r10"), + Q!(" mov " "r15, 0xffffffffffffffff"), + Q!(" sbb " "r15, r11"), + Q!(" mov " "rax, [rsp + " SCALAR!() "+ 24]"), + Q!(" bt " "rax, 63"), + Q!(" cmovc " "r8, r12"), + Q!(" cmovc " "r9, r13"), + Q!(" cmovc " "r10, r14"), + Q!(" cmovc " "r11, r15"), + Q!(" mov " "[rsp + " X3!() "], r8"), + Q!(" mov " "[rsp + " X3!() "+ 8], r9"), + Q!(" mov " "[rsp + " X3!() "+ 16], r10"), + Q!(" mov " "[rsp + " X3!() "+ 24], r11"), + + // Now we need to map out of the extended-projective representation + // (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means + // first calling the modular inverse to get w_3 = 1/z_3. + + Q!(" lea " "rdi, [rsp + " W3!() "]"), + Q!(" lea " "rsi, [rsp + " Z3!() "]"), + + // Inline copy of bignum_inv_p25519, identical except for stripping out + // the prologue and epilogue saving and restoring registers and making + // and reclaiming room on the stack. For more details and explanations see + // "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for + // its own temporaries is 208 bytes, so it has no effect on variables + // that are needed in the rest of our computation here: res, x_3, y_3, + // z_3 and w_3. + + Q!(" mov " "[rsp + 0xc0], rdi"), + Q!(" xor " "eax, eax"), + Q!(" lea " "rcx, [rax -0x13]"), + Q!(" not " "rax"), + Q!(" mov " "[rsp], rcx"), + Q!(" mov " "[rsp + 0x8], rax"), + Q!(" mov " "[rsp + 0x10], rax"), + Q!(" btr " "rax, 0x3f"), + Q!(" mov " "[rsp + 0x18], rax"), + Q!(" mov " "rdx, [rsi]"), + Q!(" mov " "rcx, [rsi + 0x8]"), + Q!(" mov " "r8, [rsi + 0x10]"), + Q!(" mov " "r9, [rsi + 0x18]"), + Q!(" mov " "eax, 0x1"), + Q!(" xor " "r10d, r10d"), + Q!(" bts " "r9, 0x3f"), + Q!(" adc " "rax, r10"), + Q!(" imul " "rax, rax, 0x13"), + Q!(" add " "rdx, rax"), + Q!(" adc " "rcx, r10"), + Q!(" adc " "r8, r10"), + Q!(" adc " "r9, r10"), + Q!(" mov " "eax, 0x13"), + Q!(" cmovb " "rax, r10"), + Q!(" sub " "rdx, rax"), + Q!(" sbb " "rcx, r10"), + Q!(" sbb " "r8, r10"), + Q!(" sbb " "r9, r10"), + Q!(" btr " "r9, 0x3f"), + Q!(" mov " "[rsp + 0x20], rdx"), + Q!(" mov " "[rsp + 0x28], rcx"), + Q!(" mov " "[rsp + 0x30], r8"), + Q!(" mov " "[rsp + 0x38], r9"), + Q!(" xor " "eax, eax"), + Q!(" mov " "[rsp + 0x40], rax"), + Q!(" mov " "[rsp + 0x48], rax"), + Q!(" mov " "[rsp + 0x50], rax"), + Q!(" mov " "[rsp + 0x58], rax"), + Q!(" movabs " "rax, 0xa0f99e2375022099"), + Q!(" mov " "[rsp + 0x60], rax"), + Q!(" movabs " "rax, 0xa8c68f3f1d132595"), + Q!(" mov " "[rsp + 0x68], rax"), + Q!(" movabs " "rax, 0x6c6c893805ac5242"), + Q!(" mov " "[rsp + 0x70], rax"), + Q!(" movabs " "rax, 0x276508b241770615"), + Q!(" mov " "[rsp + 0x78], rax"), + Q!(" mov " "QWORD PTR [rsp + 0x90], 0xa"), + Q!(" mov " "QWORD PTR [rsp + 0x98], 0x1"), + Q!(" jmp " Label!("edwards25519_scalarmulbase_midloop", 3, After)), + Q!(Label!("edwards25519_scalarmulbase_inverseloop", 4) ":"), + Q!(" mov " "r9, r8"), + Q!(" sar " "r9, 0x3f"), + Q!(" xor " "r8, r9"), + Q!(" sub " "r8, r9"), + Q!(" mov " "r11, r10"), + Q!(" sar " "r11, 0x3f"), + Q!(" xor " "r10, r11"), + Q!(" sub " "r10, r11"), + Q!(" mov " "r13, r12"), + Q!(" sar " "r13, 0x3f"), + Q!(" xor " "r12, r13"), + Q!(" sub " "r12, r13"), + Q!(" mov " "r15, r14"), + Q!(" sar " "r15, 0x3f"), + Q!(" xor " "r14, r15"), + Q!(" sub " "r14, r15"), + Q!(" mov " "rax, r8"), + Q!(" and " "rax, r9"), + Q!(" mov " "rdi, r10"), + Q!(" and " "rdi, r11"), + Q!(" add " "rdi, rax"), + Q!(" mov " "[rsp + 0x80], rdi"), + Q!(" mov " "rax, r12"), + Q!(" and " "rax, r13"), + Q!(" mov " "rsi, r14"), + Q!(" and " "rsi, r15"), + Q!(" add " "rsi, rax"), + Q!(" mov " "[rsp + 0x88], rsi"), + Q!(" xor " "ebx, ebx"), + Q!(" mov " "rax, [rsp]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rdi, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" mov " "rax, [rsp + 0x20]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rdi, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rax, [rsp]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" mov " "rax, [rsp + 0x20]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" xor " "ecx, ecx"), + Q!(" mov " "rax, [rsp + 0x8]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" mov " "rax, [rsp + 0x28]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" shrd " "rdi, rbx, 0x3b"), + Q!(" mov " "[rsp], rdi"), + Q!(" xor " "edi, edi"), + Q!(" mov " "rax, [rsp + 0x8]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rdi, rdx"), + Q!(" mov " "rax, [rsp + 0x28]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rdi, rdx"), + Q!(" shrd " "rsi, rbp, 0x3b"), + Q!(" mov " "[rsp + 0x20], rsi"), + Q!(" xor " "esi, esi"), + Q!(" mov " "rax, [rsp + 0x10]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "rax, [rsp + 0x30]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" shrd " "rbx, rcx, 0x3b"), + Q!(" mov " "[rsp + 0x8], rbx"), + Q!(" xor " "ebx, ebx"), + Q!(" mov " "rax, [rsp + 0x10]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" add " "rdi, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" mov " "rax, [rsp + 0x30]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rdi, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" shrd " "rbp, rdi, 0x3b"), + Q!(" mov " "[rsp + 0x28], rbp"), + Q!(" mov " "rax, [rsp + 0x18]"), + Q!(" xor " "rax, r9"), + Q!(" mov " "rbp, rax"), + Q!(" sar " "rbp, 0x3f"), + Q!(" and " "rbp, r8"), + Q!(" neg " "rbp"), + Q!(" mul " "r8"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" mov " "rax, [rsp + 0x38]"), + Q!(" xor " "rax, r11"), + Q!(" mov " "rdx, rax"), + Q!(" sar " "rdx, 0x3f"), + Q!(" and " "rdx, r10"), + Q!(" sub " "rbp, rdx"), + Q!(" mul " "r10"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" shrd " "rcx, rsi, 0x3b"), + Q!(" mov " "[rsp + 0x10], rcx"), + Q!(" shrd " "rsi, rbp, 0x3b"), + Q!(" mov " "rax, [rsp + 0x18]"), + Q!(" mov " "[rsp + 0x18], rsi"), + Q!(" xor " "rax, r13"), + Q!(" mov " "rsi, rax"), + Q!(" sar " "rsi, 0x3f"), + Q!(" and " "rsi, r12"), + Q!(" neg " "rsi"), + Q!(" mul " "r12"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "rax, [rsp + 0x38]"), + Q!(" xor " "rax, r15"), + Q!(" mov " "rdx, rax"), + Q!(" sar " "rdx, 0x3f"), + Q!(" and " "rdx, r14"), + Q!(" sub " "rsi, rdx"), + Q!(" mul " "r14"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" shrd " "rdi, rbx, 0x3b"), + Q!(" mov " "[rsp + 0x30], rdi"), + Q!(" shrd " "rbx, rsi, 0x3b"), + Q!(" mov " "[rsp + 0x38], rbx"), + Q!(" mov " "rbx, [rsp + 0x80]"), + Q!(" mov " "rbp, [rsp + 0x88]"), + Q!(" xor " "ecx, ecx"), + Q!(" mov " "rax, [rsp + 0x40]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" mov " "rax, [rsp + 0x60]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" xor " "esi, esi"), + Q!(" mov " "rax, [rsp + 0x40]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" mov " "[rsp + 0x40], rbx"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "rax, [rsp + 0x60]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "[rsp + 0x60], rbp"), + Q!(" xor " "ebx, ebx"), + Q!(" mov " "rax, [rsp + 0x48]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" mov " "rax, [rsp + 0x68]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rax, [rsp + 0x48]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" mov " "[rsp + 0x48], rcx"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" mov " "rax, [rsp + 0x68]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" mov " "[rsp + 0x68], rsi"), + Q!(" xor " "ecx, ecx"), + Q!(" mov " "rax, [rsp + 0x50]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" mov " "rax, [rsp + 0x70]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" xor " "esi, esi"), + Q!(" mov " "rax, [rsp + 0x50]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" mov " "[rsp + 0x50], rbx"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "rax, [rsp + 0x70]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "[rsp + 0x70], rbp"), + Q!(" mov " "rax, [rsp + 0x58]"), + Q!(" xor " "rax, r9"), + Q!(" mov " "rbx, r9"), + Q!(" and " "rbx, r8"), + Q!(" neg " "rbx"), + Q!(" mul " "r8"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" mov " "rax, [rsp + 0x78]"), + Q!(" xor " "rax, r11"), + Q!(" mov " "rdx, r11"), + Q!(" and " "rdx, r10"), + Q!(" sub " "rbx, rdx"), + Q!(" mul " "r10"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rdx, rbx"), + Q!(" mov " "rbx, rdx"), + Q!(" shld " "rdx, rcx, 0x1"), + Q!(" sar " "rbx, 0x3f"), + Q!(" add " "rdx, rbx"), + Q!(" mov " "eax, 0x13"), + Q!(" imul " "rdx"), + Q!(" mov " "r8, [rsp + 0x40]"), + Q!(" add " "r8, rax"), + Q!(" mov " "[rsp + 0x40], r8"), + Q!(" mov " "r8, [rsp + 0x48]"), + Q!(" adc " "r8, rdx"), + Q!(" mov " "[rsp + 0x48], r8"), + Q!(" mov " "r8, [rsp + 0x50]"), + Q!(" adc " "r8, rbx"), + Q!(" mov " "[rsp + 0x50], r8"), + Q!(" adc " "rcx, rbx"), + Q!(" shl " "rax, 0x3f"), + Q!(" add " "rcx, rax"), + Q!(" mov " "rax, [rsp + 0x58]"), + Q!(" mov " "[rsp + 0x58], rcx"), + Q!(" xor " "rax, r13"), + Q!(" mov " "rcx, r13"), + Q!(" and " "rcx, r12"), + Q!(" neg " "rcx"), + Q!(" mul " "r12"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" mov " "rax, [rsp + 0x78]"), + Q!(" xor " "rax, r15"), + Q!(" mov " "rdx, r15"), + Q!(" and " "rdx, r14"), + Q!(" sub " "rcx, rdx"), + Q!(" mul " "r14"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rdx, rcx"), + Q!(" mov " "rcx, rdx"), + Q!(" shld " "rdx, rsi, 0x1"), + Q!(" sar " "rcx, 0x3f"), + Q!(" mov " "eax, 0x13"), + Q!(" add " "rdx, rcx"), + Q!(" imul " "rdx"), + Q!(" mov " "r8, [rsp + 0x60]"), + Q!(" add " "r8, rax"), + Q!(" mov " "[rsp + 0x60], r8"), + Q!(" mov " "r8, [rsp + 0x68]"), + Q!(" adc " "r8, rdx"), + Q!(" mov " "[rsp + 0x68], r8"), + Q!(" mov " "r8, [rsp + 0x70]"), + Q!(" adc " "r8, rcx"), + Q!(" mov " "[rsp + 0x70], r8"), + Q!(" adc " "rsi, rcx"), + Q!(" shl " "rax, 0x3f"), + Q!(" add " "rsi, rax"), + Q!(" mov " "[rsp + 0x78], rsi"), + Q!(Label!("edwards25519_scalarmulbase_midloop", 3) ":"), + Q!(" mov " "rsi, [rsp + 0x98]"), + Q!(" mov " "rdx, [rsp]"), + Q!(" mov " "rcx, [rsp + 0x20]"), + Q!(" mov " "rbx, rdx"), + Q!(" and " "rbx, 0xfffff"), + Q!(" movabs " "rax, 0xfffffe0000000000"), + Q!(" or " "rbx, rax"), + Q!(" and " "rcx, 0xfffff"), + Q!(" movabs " "rax, 0xc000000000000000"), + Q!(" or " "rcx, rax"), + Q!(" mov " "rax, 0xfffffffffffffffe"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "edx, 0x2"), + Q!(" mov " "rdi, rbx"), + Q!(" mov " "r8, rax"), + Q!(" test " "rsi, rsi"), + Q!(" cmovs " "r8, rbp"), + Q!(" test " "rcx, 0x1"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" sar " "rcx, 1"), + Q!(" mov " "eax, 0x100000"), + Q!(" lea " "rdx, [rbx + rax]"), + Q!(" lea " "rdi, [rcx + rax]"), + Q!(" shl " "rdx, 0x16"), + Q!(" shl " "rdi, 0x16"), + Q!(" sar " "rdx, 0x2b"), + Q!(" sar " "rdi, 0x2b"), + Q!(" movabs " "rax, 0x20000100000"), + Q!(" lea " "rbx, [rbx + rax]"), + Q!(" lea " "rcx, [rcx + rax]"), + Q!(" sar " "rbx, 0x2a"), + Q!(" sar " "rcx, 0x2a"), + Q!(" mov " "[rsp + 0xa0], rdx"), + Q!(" mov " "[rsp + 0xa8], rbx"), + Q!(" mov " "[rsp + 0xb0], rdi"), + Q!(" mov " "[rsp + 0xb8], rcx"), + Q!(" mov " "r12, [rsp]"), + Q!(" imul " "rdi, r12"), + Q!(" imul " "r12, rdx"), + Q!(" mov " "r13, [rsp + 0x20]"), + Q!(" imul " "rbx, r13"), + Q!(" imul " "r13, rcx"), + Q!(" add " "r12, rbx"), + Q!(" add " "r13, rdi"), + Q!(" sar " "r12, 0x14"), + Q!(" sar " "r13, 0x14"), + Q!(" mov " "rbx, r12"), + Q!(" and " "rbx, 0xfffff"), + Q!(" movabs " "rax, 0xfffffe0000000000"), + Q!(" or " "rbx, rax"), + Q!(" mov " "rcx, r13"), + Q!(" and " "rcx, 0xfffff"), + Q!(" movabs " "rax, 0xc000000000000000"), + Q!(" or " "rcx, rax"), + Q!(" mov " "rax, 0xfffffffffffffffe"), + Q!(" mov " "edx, 0x2"), + Q!(" mov " "rdi, rbx"), + Q!(" mov " "r8, rax"), + Q!(" test " "rsi, rsi"), + Q!(" cmovs " "r8, rbp"), + Q!(" test " "rcx, 0x1"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" sar " "rcx, 1"), + Q!(" mov " "eax, 0x100000"), + Q!(" lea " "r8, [rbx + rax]"), + Q!(" lea " "r10, [rcx + rax]"), + Q!(" shl " "r8, 0x16"), + Q!(" shl " "r10, 0x16"), + Q!(" sar " "r8, 0x2b"), + Q!(" sar " "r10, 0x2b"), + Q!(" movabs " "rax, 0x20000100000"), + Q!(" lea " "r15, [rbx + rax]"), + Q!(" lea " "r11, [rcx + rax]"), + Q!(" sar " "r15, 0x2a"), + Q!(" sar " "r11, 0x2a"), + Q!(" mov " "rbx, r13"), + Q!(" mov " "rcx, r12"), + Q!(" imul " "r12, r8"), + Q!(" imul " "rbx, r15"), + Q!(" add " "r12, rbx"), + Q!(" imul " "r13, r11"), + Q!(" imul " "rcx, r10"), + Q!(" add " "r13, rcx"), + Q!(" sar " "r12, 0x14"), + Q!(" sar " "r13, 0x14"), + Q!(" mov " "rbx, r12"), + Q!(" and " "rbx, 0xfffff"), + Q!(" movabs " "rax, 0xfffffe0000000000"), + Q!(" or " "rbx, rax"), + Q!(" mov " "rcx, r13"), + Q!(" and " "rcx, 0xfffff"), + Q!(" movabs " "rax, 0xc000000000000000"), + Q!(" or " "rcx, rax"), + Q!(" mov " "rax, [rsp + 0xa0]"), + Q!(" imul " "rax, r8"), + Q!(" mov " "rdx, [rsp + 0xb0]"), + Q!(" imul " "rdx, r15"), + Q!(" imul " "r8, [rsp + 0xa8]"), + Q!(" imul " "r15, [rsp + 0xb8]"), + Q!(" add " "r15, r8"), + Q!(" lea " "r9, [rax + rdx]"), + Q!(" mov " "rax, [rsp + 0xa0]"), + Q!(" imul " "rax, r10"), + Q!(" mov " "rdx, [rsp + 0xb0]"), + Q!(" imul " "rdx, r11"), + Q!(" imul " "r10, [rsp + 0xa8]"), + Q!(" imul " "r11, [rsp + 0xb8]"), + Q!(" add " "r11, r10"), + Q!(" lea " "r13, [rax + rdx]"), + Q!(" mov " "rax, 0xfffffffffffffffe"), + Q!(" mov " "edx, 0x2"), + Q!(" mov " "rdi, rbx"), + Q!(" mov " "r8, rax"), + Q!(" test " "rsi, rsi"), + Q!(" cmovs " "r8, rbp"), + Q!(" test " "rcx, 0x1"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" sar " "rcx, 1"), + Q!(" mov " "eax, 0x100000"), + Q!(" lea " "r8, [rbx + rax]"), + Q!(" lea " "r12, [rcx + rax]"), + Q!(" shl " "r8, 0x15"), + Q!(" shl " "r12, 0x15"), + Q!(" sar " "r8, 0x2b"), + Q!(" sar " "r12, 0x2b"), + Q!(" movabs " "rax, 0x20000100000"), + Q!(" lea " "r10, [rbx + rax]"), + Q!(" lea " "r14, [rcx + rax]"), + Q!(" sar " "r10, 0x2b"), + Q!(" sar " "r14, 0x2b"), + Q!(" mov " "rax, r9"), + Q!(" imul " "rax, r8"), + Q!(" mov " "rdx, r13"), + Q!(" imul " "rdx, r10"), + Q!(" imul " "r8, r15"), + Q!(" imul " "r10, r11"), + Q!(" add " "r10, r8"), + Q!(" lea " "r8, [rax + rdx]"), + Q!(" mov " "rax, r9"), + Q!(" imul " "rax, r12"), + Q!(" mov " "rdx, r13"), + Q!(" imul " "rdx, r14"), + Q!(" imul " "r12, r15"), + Q!(" imul " "r14, r11"), + Q!(" add " "r14, r12"), + Q!(" lea " "r12, [rax + rdx]"), + Q!(" mov " "[rsp + 0x98], rsi"), + Q!(" dec " "QWORD PTR [rsp + 0x90]"), + Q!(" jne " Label!("edwards25519_scalarmulbase_inverseloop", 4, Before)), + Q!(" mov " "rax, [rsp]"), + Q!(" mov " "rcx, [rsp + 0x20]"), + Q!(" imul " "rax, r8"), + Q!(" imul " "rcx, r10"), + Q!(" add " "rax, rcx"), + Q!(" sar " "rax, 0x3f"), + Q!(" mov " "r9, r8"), + Q!(" sar " "r9, 0x3f"), + Q!(" xor " "r8, r9"), + Q!(" sub " "r8, r9"), + Q!(" xor " "r9, rax"), + Q!(" mov " "r11, r10"), + Q!(" sar " "r11, 0x3f"), + Q!(" xor " "r10, r11"), + Q!(" sub " "r10, r11"), + Q!(" xor " "r11, rax"), + Q!(" mov " "r13, r12"), + Q!(" sar " "r13, 0x3f"), + Q!(" xor " "r12, r13"), + Q!(" sub " "r12, r13"), + Q!(" xor " "r13, rax"), + Q!(" mov " "r15, r14"), + Q!(" sar " "r15, 0x3f"), + Q!(" xor " "r14, r15"), + Q!(" sub " "r14, r15"), + Q!(" xor " "r15, rax"), + Q!(" mov " "rax, r8"), + Q!(" and " "rax, r9"), + Q!(" mov " "r12, r10"), + Q!(" and " "r12, r11"), + Q!(" add " "r12, rax"), + Q!(" xor " "r13d, r13d"), + Q!(" mov " "rax, [rsp + 0x40]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "r12, rax"), + Q!(" adc " "r13, rdx"), + Q!(" mov " "rax, [rsp + 0x60]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "r12, rax"), + Q!(" adc " "r13, rdx"), + Q!(" xor " "r14d, r14d"), + Q!(" mov " "rax, [rsp + 0x48]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "r13, rax"), + Q!(" adc " "r14, rdx"), + Q!(" mov " "rax, [rsp + 0x68]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "r13, rax"), + Q!(" adc " "r14, rdx"), + Q!(" xor " "r15d, r15d"), + Q!(" mov " "rax, [rsp + 0x50]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "r14, rax"), + Q!(" adc " "r15, rdx"), + Q!(" mov " "rax, [rsp + 0x70]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "r14, rax"), + Q!(" adc " "r15, rdx"), + Q!(" mov " "rax, [rsp + 0x58]"), + Q!(" xor " "rax, r9"), + Q!(" and " "r9, r8"), + Q!(" neg " "r9"), + Q!(" mul " "r8"), + Q!(" add " "r15, rax"), + Q!(" adc " "r9, rdx"), + Q!(" mov " "rax, [rsp + 0x78]"), + Q!(" xor " "rax, r11"), + Q!(" mov " "rdx, r11"), + Q!(" and " "rdx, r10"), + Q!(" sub " "r9, rdx"), + Q!(" mul " "r10"), + Q!(" add " "r15, rax"), + Q!(" adc " "r9, rdx"), + Q!(" mov " "rax, r9"), + Q!(" shld " "rax, r15, 0x1"), + Q!(" sar " "r9, 0x3f"), + Q!(" mov " "ebx, 0x13"), + Q!(" lea " "rax, [rax + r9 + 0x1]"), + Q!(" imul " "rbx"), + Q!(" xor " "ebp, ebp"), + Q!(" add " "r12, rax"), + Q!(" adc " "r13, rdx"), + Q!(" adc " "r14, r9"), + Q!(" adc " "r15, r9"), + Q!(" shl " "rax, 0x3f"), + Q!(" add " "r15, rax"), + Q!(" cmovns " "rbx, rbp"), + Q!(" sub " "r12, rbx"), + Q!(" sbb " "r13, rbp"), + Q!(" sbb " "r14, rbp"), + Q!(" sbb " "r15, rbp"), + Q!(" btr " "r15, 0x3f"), + Q!(" mov " "rdi, [rsp + 0xc0]"), + Q!(" mov " "[rdi], r12"), + Q!(" mov " "[rdi + 0x8], r13"), + Q!(" mov " "[rdi + 0x10], r14"), + Q!(" mov " "[rdi + 0x18], r15"), + + // The final result is x = X * inv(Z), y = Y * inv(Z). + // These are the only operations in the whole computation that + // fully reduce modulo p_25519 since now we want the canonical + // answer as output. + + Q!(" mov " "rbp, " res!()), + mul_p25519!(resx!(), x_3!(), w_3!()), + mul_p25519!(resy!(), y_3!(), w_3!()), + + // Restore stack and registers + + Q!(" add " "rsp, " NSPACE!()), + + Q!(" pop " "r15"), + Q!(" pop " "r14"), + Q!(" pop " "r13"), + Q!(" pop " "r12"), + Q!(" pop " "rbp"), + Q!(" pop " "rbx"), + inout("rdi") res.as_mut_ptr() => _, + inout("rsi") scalar.as_ptr() => _, + edwards25519_scalarmulbase_0g = sym edwards25519_scalarmulbase_0g, + edwards25519_scalarmulbase_251g = sym edwards25519_scalarmulbase_251g, + edwards25519_scalarmulbase_gtable = sym edwards25519_scalarmulbase_gtable, + // clobbers + out("r10") _, + out("r11") _, + out("r12") _, + out("r13") _, + out("r14") _, + out("r15") _, + out("r8") _, + out("r9") _, + out("rax") _, + out("rcx") _, + out("rdx") _, + ) + }; +} + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// 0 * B = 0 and 2^251 * B in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +static edwards25519_scalarmulbase_0g: [u64; 12] = [ + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000001, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, +]; + +static edwards25519_scalarmulbase_251g: [u64; 12] = [ + 0x525f946d7c7220e7, + 0x4636b0b2f1e35444, + 0x796e9d70e892ae0f, + 0x03dec05fa937adb1, + 0x6d1c271cc6375515, + 0x462588c4a4ca4f14, + 0x691129fee55afc39, + 0x15949f784d8472f5, + 0xbd89e510afad0049, + 0x4d1f08c073b9860e, + 0x07716e8b2d00af9d, + 0x70d685f68f859714, + // Precomputed table of multiples of generator for edwards25519 + // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. +]; + +static edwards25519_scalarmulbase_gtable: [u64; 6048] = [ + // 2^0 * 1 * G + 0x9d103905d740913e, + 0xfd399f05d140beb3, + 0xa5c18434688f8a09, + 0x44fd2f9298f81267, + 0x2fbc93c6f58c3b85, + 0xcf932dc6fb8c0e19, + 0x270b4898643d42c2, + 0x07cf9d3a33d4ba65, + 0xabc91205877aaa68, + 0x26d9e823ccaac49e, + 0x5a1b7dcbdd43598c, + 0x6f117b689f0c65a8, + // 2^0 * 2 * G + 0x8a99a56042b4d5a8, + 0x8f2b810c4e60acf6, + 0xe09e236bb16e37aa, + 0x6bb595a669c92555, + 0x9224e7fc933c71d7, + 0x9f469d967a0ff5b5, + 0x5aa69a65e1d60702, + 0x590c063fa87d2e2e, + 0x43faa8b3a59b7a5f, + 0x36c16bdd5d9acf78, + 0x500fa0840b3d6a31, + 0x701af5b13ea50b73, + // 2^0 * 3 * G + 0x56611fe8a4fcd265, + 0x3bd353fde5c1ba7d, + 0x8131f31a214bd6bd, + 0x2ab91587555bda62, + 0xaf25b0a84cee9730, + 0x025a8430e8864b8a, + 0xc11b50029f016732, + 0x7a164e1b9a80f8f4, + 0x14ae933f0dd0d889, + 0x589423221c35da62, + 0xd170e5458cf2db4c, + 0x5a2826af12b9b4c6, + // 2^0 * 4 * G + 0x95fe050a056818bf, + 0x327e89715660faa9, + 0xc3e8e3cd06a05073, + 0x27933f4c7445a49a, + 0x287351b98efc099f, + 0x6765c6f47dfd2538, + 0xca348d3dfb0a9265, + 0x680e910321e58727, + 0x5a13fbe9c476ff09, + 0x6e9e39457b5cc172, + 0x5ddbdcf9102b4494, + 0x7f9d0cbf63553e2b, + // 2^0 * 5 * G + 0x7f9182c3a447d6ba, + 0xd50014d14b2729b7, + 0xe33cf11cb864a087, + 0x154a7e73eb1b55f3, + 0xa212bc4408a5bb33, + 0x8d5048c3c75eed02, + 0xdd1beb0c5abfec44, + 0x2945ccf146e206eb, + 0xbcbbdbf1812a8285, + 0x270e0807d0bdd1fc, + 0xb41b670b1bbda72d, + 0x43aabe696b3bb69a, + // 2^0 * 6 * G + 0x499806b67b7d8ca4, + 0x575be28427d22739, + 0xbb085ce7204553b9, + 0x38b64c41ae417884, + 0x3a0ceeeb77157131, + 0x9b27158900c8af88, + 0x8065b668da59a736, + 0x51e57bb6a2cc38bd, + 0x85ac326702ea4b71, + 0xbe70e00341a1bb01, + 0x53e4a24b083bc144, + 0x10b8e91a9f0d61e3, + // 2^0 * 7 * G + 0xba6f2c9aaa3221b1, + 0x6ca021533bba23a7, + 0x9dea764f92192c3a, + 0x1d6edd5d2e5317e0, + 0x6b1a5cd0944ea3bf, + 0x7470353ab39dc0d2, + 0x71b2528228542e49, + 0x461bea69283c927e, + 0xf1836dc801b8b3a2, + 0xb3035f47053ea49a, + 0x529c41ba5877adf3, + 0x7a9fbb1c6a0f90a7, + // 2^0 * 8 * G + 0xe2a75dedf39234d9, + 0x963d7680e1b558f9, + 0x2c2741ac6e3c23fb, + 0x3a9024a1320e01c3, + 0x59b7596604dd3e8f, + 0x6cb30377e288702c, + 0xb1339c665ed9c323, + 0x0915e76061bce52f, + 0xe7c1f5d9c9a2911a, + 0xb8a371788bcca7d7, + 0x636412190eb62a32, + 0x26907c5c2ecc4e95, + // 2^4 * 1 * G + 0x7ec851ca553e2df3, + 0xa71284cba64878b3, + 0xe6b5e4193288d1e7, + 0x4cf210ec5a9a8883, + 0x322d04a52d9021f6, + 0xb9c19f3375c6bf9c, + 0x587a3a4342d20b09, + 0x143b1cf8aa64fe61, + 0x9f867c7d968acaab, + 0x5f54258e27092729, + 0xd0a7d34bea180975, + 0x21b546a3374126e1, + // 2^4 * 2 * G + 0xa94ff858a2888343, + 0xce0ed4565313ed3c, + 0xf55c3dcfb5bf34fa, + 0x0a653ca5c9eab371, + 0x490a7a45d185218f, + 0x9a15377846049335, + 0x0060ea09cc31e1f6, + 0x7e041577f86ee965, + 0x66b2a496ce5b67f3, + 0xff5492d8bd569796, + 0x503cec294a592cd0, + 0x566943650813acb2, + // 2^4 * 3 * G + 0xb818db0c26620798, + 0x5d5c31d9606e354a, + 0x0982fa4f00a8cdc7, + 0x17e12bcd4653e2d4, + 0x5672f9eb1dabb69d, + 0xba70b535afe853fc, + 0x47ac0f752796d66d, + 0x32a5351794117275, + 0xd3a644a6df648437, + 0x703b6559880fbfdd, + 0xcb852540ad3a1aa5, + 0x0900b3f78e4c6468, + // 2^4 * 4 * G + 0x0a851b9f679d651b, + 0xe108cb61033342f2, + 0xd601f57fe88b30a3, + 0x371f3acaed2dd714, + 0xed280fbec816ad31, + 0x52d9595bd8e6efe3, + 0x0fe71772f6c623f5, + 0x4314030b051e293c, + 0xd560005efbf0bcad, + 0x8eb70f2ed1870c5e, + 0x201f9033d084e6a0, + 0x4c3a5ae1ce7b6670, + // 2^4 * 5 * G + 0x4138a434dcb8fa95, + 0x870cf67d6c96840b, + 0xde388574297be82c, + 0x7c814db27262a55a, + 0xbaf875e4c93da0dd, + 0xb93282a771b9294d, + 0x80d63fb7f4c6c460, + 0x6de9c73dea66c181, + 0x478904d5a04df8f2, + 0xfafbae4ab10142d3, + 0xf6c8ac63555d0998, + 0x5aac4a412f90b104, + // 2^4 * 6 * G + 0xc64f326b3ac92908, + 0x5551b282e663e1e0, + 0x476b35f54a1a4b83, + 0x1b9da3fe189f68c2, + 0x603a0d0abd7f5134, + 0x8089c932e1d3ae46, + 0xdf2591398798bd63, + 0x1c145cd274ba0235, + 0x32e8386475f3d743, + 0x365b8baf6ae5d9ef, + 0x825238b6385b681e, + 0x234929c1167d65e1, + // 2^4 * 7 * G + 0x984decaba077ade8, + 0x383f77ad19eb389d, + 0xc7ec6b7e2954d794, + 0x59c77b3aeb7c3a7a, + 0x48145cc21d099fcf, + 0x4535c192cc28d7e5, + 0x80e7c1e548247e01, + 0x4a5f28743b2973ee, + 0xd3add725225ccf62, + 0x911a3381b2152c5d, + 0xd8b39fad5b08f87d, + 0x6f05606b4799fe3b, + // 2^4 * 8 * G + 0x9ffe9e92177ba962, + 0x98aee71d0de5cae1, + 0x3ff4ae942d831044, + 0x714de12e58533ac8, + 0x5b433149f91b6483, + 0xadb5dc655a2cbf62, + 0x87fa8412632827b3, + 0x60895e91ab49f8d8, + 0xe9ecf2ed0cf86c18, + 0xb46d06120735dfd4, + 0xbc9da09804b96be7, + 0x73e2e62fd96dc26b, + // 2^8 * 1 * G + 0xed5b635449aa515e, + 0xa865c49f0bc6823a, + 0x850c1fe95b42d1c4, + 0x30d76d6f03d315b9, + 0x2eccdd0e632f9c1d, + 0x51d0b69676893115, + 0x52dfb76ba8637a58, + 0x6dd37d49a00eef39, + 0x6c4444172106e4c7, + 0xfb53d680928d7f69, + 0xb4739ea4694d3f26, + 0x10c697112e864bb0, + // 2^8 * 2 * G + 0x6493c4277dbe5fde, + 0x265d4fad19ad7ea2, + 0x0e00dfc846304590, + 0x25e61cabed66fe09, + 0x0ca62aa08358c805, + 0x6a3d4ae37a204247, + 0x7464d3a63b11eddc, + 0x03bf9baf550806ef, + 0x3f13e128cc586604, + 0x6f5873ecb459747e, + 0xa0b63dedcc1268f5, + 0x566d78634586e22c, + // 2^8 * 3 * G + 0x1637a49f9cc10834, + 0xbc8e56d5a89bc451, + 0x1cb5ec0f7f7fd2db, + 0x33975bca5ecc35d9, + 0xa1054285c65a2fd0, + 0x6c64112af31667c3, + 0x680ae240731aee58, + 0x14fba5f34793b22a, + 0x3cd746166985f7d4, + 0x593e5e84c9c80057, + 0x2fc3f2b67b61131e, + 0x14829cea83fc526c, + // 2^8 * 4 * G + 0xff437b8497dd95c2, + 0x6c744e30aa4eb5a7, + 0x9e0c5d613c85e88b, + 0x2fd9c71e5f758173, + 0x21e70b2f4e71ecb8, + 0xe656ddb940a477e3, + 0xbf6556cece1d4f80, + 0x05fc3bc4535d7b7e, + 0x24b8b3ae52afdedd, + 0x3495638ced3b30cf, + 0x33a4bc83a9be8195, + 0x373767475c651f04, + // 2^8 * 5 * G + 0x2fba99fd40d1add9, + 0xb307166f96f4d027, + 0x4363f05215f03bae, + 0x1fbea56c3b18f999, + 0x634095cb14246590, + 0xef12144016c15535, + 0x9e38140c8910bc60, + 0x6bf5905730907c8c, + 0x0fa778f1e1415b8a, + 0x06409ff7bac3a77e, + 0x6f52d7b89aa29a50, + 0x02521cf67a635a56, + // 2^8 * 6 * G + 0x513fee0b0a9d5294, + 0x8f98e75c0fdf5a66, + 0xd4618688bfe107ce, + 0x3fa00a7e71382ced, + 0xb1146720772f5ee4, + 0xe8f894b196079ace, + 0x4af8224d00ac824a, + 0x001753d9f7cd6cc4, + 0x3c69232d963ddb34, + 0x1dde87dab4973858, + 0xaad7d1f9a091f285, + 0x12b5fe2fa048edb6, + // 2^8 * 7 * G + 0x71f0fbc496fce34d, + 0x73b9826badf35bed, + 0xd2047261ff28c561, + 0x749b76f96fb1206f, + 0xdf2b7c26ad6f1e92, + 0x4b66d323504b8913, + 0x8c409dc0751c8bc3, + 0x6f7e93c20796c7b8, + 0x1f5af604aea6ae05, + 0xc12351f1bee49c99, + 0x61a808b5eeff6b66, + 0x0fcec10f01e02151, + // 2^8 * 8 * G + 0x644d58a649fe1e44, + 0x21fcaea231ad777e, + 0x02441c5a887fd0d2, + 0x4901aa7183c511f3, + 0x3df2d29dc4244e45, + 0x2b020e7493d8de0a, + 0x6cc8067e820c214d, + 0x413779166feab90a, + 0x08b1b7548c1af8f0, + 0xce0f7a7c246299b4, + 0xf760b0f91e06d939, + 0x41bb887b726d1213, + // 2^12 * 1 * G + 0x9267806c567c49d8, + 0x066d04ccca791e6a, + 0xa69f5645e3cc394b, + 0x5c95b686a0788cd2, + 0x97d980e0aa39f7d2, + 0x35d0384252c6b51c, + 0x7d43f49307cd55aa, + 0x56bd36cfb78ac362, + 0x2ac519c10d14a954, + 0xeaf474b494b5fa90, + 0xe6af8382a9f87a5a, + 0x0dea6db1879be094, + // 2^12 * 2 * G + 0xaa66bf547344e5ab, + 0xda1258888f1b4309, + 0x5e87d2b3fd564b2f, + 0x5b2c78885483b1dd, + 0x15baeb74d6a8797a, + 0x7ef55cf1fac41732, + 0x29001f5a3c8b05c5, + 0x0ad7cc8752eaccfb, + 0x52151362793408cf, + 0xeb0f170319963d94, + 0xa833b2fa883d9466, + 0x093a7fa775003c78, + // 2^12 * 3 * G + 0xe5107de63a16d7be, + 0xa377ffdc9af332cf, + 0x70d5bf18440b677f, + 0x6a252b19a4a31403, + 0xb8e9604460a91286, + 0x7f3fd8047778d3de, + 0x67d01e31bf8a5e2d, + 0x7b038a06c27b653e, + 0x9ed919d5d36990f3, + 0x5213aebbdb4eb9f2, + 0xc708ea054cb99135, + 0x58ded57f72260e56, + // 2^12 * 4 * G + 0x78e79dade9413d77, + 0xf257f9d59729e67d, + 0x59db910ee37aa7e6, + 0x6aa11b5bbb9e039c, + 0xda6d53265b0fd48b, + 0x8960823193bfa988, + 0xd78ac93261d57e28, + 0x79f2942d3a5c8143, + 0x97da2f25b6c88de9, + 0x251ba7eaacf20169, + 0x09b44f87ef4eb4e4, + 0x7d90ab1bbc6a7da5, + // 2^12 * 5 * G + 0x9acca683a7016bfe, + 0x90505f4df2c50b6d, + 0x6b610d5fcce435aa, + 0x19a10d446198ff96, + 0x1a07a3f496b3c397, + 0x11ceaa188f4e2532, + 0x7d9498d5a7751bf0, + 0x19ed161f508dd8a0, + 0x560a2cd687dce6ca, + 0x7f3568c48664cf4d, + 0x8741e95222803a38, + 0x483bdab1595653fc, + // 2^12 * 6 * G + 0xfa780f148734fa49, + 0x106f0b70360534e0, + 0x2210776fe3e307bd, + 0x3286c109dde6a0fe, + 0xd6cf4d0ab4da80f6, + 0x82483e45f8307fe0, + 0x05005269ae6f9da4, + 0x1c7052909cf7877a, + 0x32ee7de2874e98d4, + 0x14c362e9b97e0c60, + 0x5781dcde6a60a38a, + 0x217dd5eaaa7aa840, + // 2^12 * 7 * G + 0x9db7c4d0248e1eb0, + 0xe07697e14d74bf52, + 0x1e6a9b173c562354, + 0x7fa7c21f795a4965, + 0x8bdf1fb9be8c0ec8, + 0x00bae7f8e30a0282, + 0x4963991dad6c4f6c, + 0x07058a6e5df6f60a, + 0xe9eb02c4db31f67f, + 0xed25fd8910bcfb2b, + 0x46c8131f5c5cddb4, + 0x33b21c13a0cb9bce, + // 2^12 * 8 * G + 0x360692f8087d8e31, + 0xf4dcc637d27163f7, + 0x25a4e62065ea5963, + 0x659bf72e5ac160d9, + 0x9aafb9b05ee38c5b, + 0xbf9d2d4e071a13c7, + 0x8eee6e6de933290a, + 0x1c3bab17ae109717, + 0x1c9ab216c7cab7b0, + 0x7d65d37407bbc3cc, + 0x52744750504a58d5, + 0x09f2606b131a2990, + // 2^16 * 1 * G + 0x40e87d44744346be, + 0x1d48dad415b52b25, + 0x7c3a8a18a13b603e, + 0x4eb728c12fcdbdf7, + 0x7e234c597c6691ae, + 0x64889d3d0a85b4c8, + 0xdae2c90c354afae7, + 0x0a871e070c6a9e1d, + 0x3301b5994bbc8989, + 0x736bae3a5bdd4260, + 0x0d61ade219d59e3c, + 0x3ee7300f2685d464, + // 2^16 * 2 * G + 0xf5d255e49e7dd6b7, + 0x8016115c610b1eac, + 0x3c99975d92e187ca, + 0x13815762979125c2, + 0x43fa7947841e7518, + 0xe5c6fa59639c46d7, + 0xa1065e1de3052b74, + 0x7d47c6a2cfb89030, + 0x3fdad0148ef0d6e0, + 0x9d3e749a91546f3c, + 0x71ec621026bb8157, + 0x148cf58d34c9ec80, + // 2^16 * 3 * G + 0x46a492f67934f027, + 0x469984bef6840aa9, + 0x5ca1bc2a89611854, + 0x3ff2fa1ebd5dbbd4, + 0xe2572f7d9ae4756d, + 0x56c345bb88f3487f, + 0x9fd10b6d6960a88d, + 0x278febad4eaea1b9, + 0xb1aa681f8c933966, + 0x8c21949c20290c98, + 0x39115291219d3c52, + 0x4104dd02fe9c677b, + // 2^16 * 4 * G + 0x72b2bf5e1124422a, + 0xa1fa0c3398a33ab5, + 0x94cb6101fa52b666, + 0x2c863b00afaf53d5, + 0x81214e06db096ab8, + 0x21a8b6c90ce44f35, + 0x6524c12a409e2af5, + 0x0165b5a48efca481, + 0xf190a474a0846a76, + 0x12eff984cd2f7cc0, + 0x695e290658aa2b8f, + 0x591b67d9bffec8b8, + // 2^16 * 5 * G + 0x312f0d1c80b49bfa, + 0x5979515eabf3ec8a, + 0x727033c09ef01c88, + 0x3de02ec7ca8f7bcb, + 0x99b9b3719f18b55d, + 0xe465e5faa18c641e, + 0x61081136c29f05ed, + 0x489b4f867030128b, + 0xd232102d3aeb92ef, + 0xe16253b46116a861, + 0x3d7eabe7190baa24, + 0x49f5fbba496cbebf, + // 2^16 * 6 * G + 0x30949a108a5bcfd4, + 0xdc40dd70bc6473eb, + 0x92c294c1307c0d1c, + 0x5604a86dcbfa6e74, + 0x155d628c1e9c572e, + 0x8a4d86acc5884741, + 0x91a352f6515763eb, + 0x06a1a6c28867515b, + 0x7288d1d47c1764b6, + 0x72541140e0418b51, + 0x9f031a6018acf6d1, + 0x20989e89fe2742c6, + // 2^16 * 7 * G + 0x499777fd3a2dcc7f, + 0x32857c2ca54fd892, + 0xa279d864d207e3a0, + 0x0403ed1d0ca67e29, + 0x1674278b85eaec2e, + 0x5621dc077acb2bdf, + 0x640a4c1661cbf45a, + 0x730b9950f70595d3, + 0xc94b2d35874ec552, + 0xc5e6c8cf98246f8d, + 0xf7cb46fa16c035ce, + 0x5bd7454308303dcc, + // 2^16 * 8 * G + 0x7f9ad19528b24cc2, + 0x7f6b54656335c181, + 0x66b8b66e4fc07236, + 0x133a78007380ad83, + 0x85c4932115e7792a, + 0xc64c89a2bdcdddc9, + 0x9d1e3da8ada3d762, + 0x5bb7db123067f82c, + 0x0961f467c6ca62be, + 0x04ec21d6211952ee, + 0x182360779bd54770, + 0x740dca6d58f0e0d2, + // 2^20 * 1 * G + 0x50b70bf5d3f0af0b, + 0x4feaf48ae32e71f7, + 0x60e84ed3a55bbd34, + 0x00ed489b3f50d1ed, + 0x3906c72aed261ae5, + 0x9ab68fd988e100f7, + 0xf5e9059af3360197, + 0x0e53dc78bf2b6d47, + 0xb90829bf7971877a, + 0x5e4444636d17e631, + 0x4d05c52e18276893, + 0x27632d9a5a4a4af5, + // 2^20 * 2 * G + 0xd11ff05154b260ce, + 0xd86dc38e72f95270, + 0x601fcd0d267cc138, + 0x2b67916429e90ccd, + 0xa98285d187eaffdb, + 0xa5b4fbbbd8d0a864, + 0xb658f27f022663f7, + 0x3bbc2b22d99ce282, + 0xb917c952583c0a58, + 0x653ff9b80fe4c6f3, + 0x9b0da7d7bcdf3c0c, + 0x43a0eeb6ab54d60e, + // 2^20 * 3 * G + 0x396966a46d4a5487, + 0xf811a18aac2bb3ba, + 0x66e4685b5628b26b, + 0x70a477029d929b92, + 0x3ac6322357875fe8, + 0xd9d4f4ecf5fbcb8f, + 0x8dee8493382bb620, + 0x50c5eaa14c799fdc, + 0xdd0edc8bd6f2fb3c, + 0x54c63aa79cc7b7a0, + 0xae0b032b2c8d9f1a, + 0x6f9ce107602967fb, + // 2^20 * 4 * G + 0xad1054b1cde1c22a, + 0xc4a8e90248eb32df, + 0x5f3e7b33accdc0ea, + 0x72364713fc79963e, + 0x139693063520e0b5, + 0x437fcf7c88ea03fe, + 0xf7d4c40bd3c959bc, + 0x699154d1f893ded9, + 0x315d5c75b4b27526, + 0xcccb842d0236daa5, + 0x22f0c8a3345fee8e, + 0x73975a617d39dbed, + // 2^20 * 5 * G + 0xe4024df96375da10, + 0x78d3251a1830c870, + 0x902b1948658cd91c, + 0x7e18b10b29b7438a, + 0x6f37f392f4433e46, + 0x0e19b9a11f566b18, + 0x220fb78a1fd1d662, + 0x362a4258a381c94d, + 0x9071d9132b6beb2f, + 0x0f26e9ad28418247, + 0xeab91ec9bdec925d, + 0x4be65bc8f48af2de, + // 2^20 * 6 * G + 0x78487feba36e7028, + 0x5f3f13001dd8ce34, + 0x934fb12d4b30c489, + 0x056c244d397f0a2b, + 0x1d50fba257c26234, + 0x7bd4823adeb0678b, + 0xc2b0dc6ea6538af5, + 0x5665eec6351da73e, + 0xdb3ee00943bfb210, + 0x4972018720800ac2, + 0x26ab5d6173bd8667, + 0x20b209c2ab204938, + // 2^20 * 7 * G + 0x549e342ac07fb34b, + 0x02d8220821373d93, + 0xbc262d70acd1f567, + 0x7a92c9fdfbcac784, + 0x1fcca94516bd3289, + 0x448d65aa41420428, + 0x59c3b7b216a55d62, + 0x49992cc64e612cd8, + 0x65bd1bea70f801de, + 0x1befb7c0fe49e28a, + 0xa86306cdb1b2ae4a, + 0x3b7ac0cd265c2a09, + // 2^20 * 8 * G + 0x822bee438c01bcec, + 0x530cb525c0fbc73b, + 0x48519034c1953fe9, + 0x265cc261e09a0f5b, + 0xf0d54e4f22ed39a7, + 0xa2aae91e5608150a, + 0xf421b2e9eddae875, + 0x31bc531d6b7de992, + 0xdf3d134da980f971, + 0x7a4fb8d1221a22a7, + 0x3df7d42035aad6d8, + 0x2a14edcc6a1a125e, + // 2^24 * 1 * G + 0xdf48ee0752cfce4e, + 0xc3fffaf306ec08b7, + 0x05710b2ab95459c4, + 0x161d25fa963ea38d, + 0x231a8c570478433c, + 0xb7b5270ec281439d, + 0xdbaa99eae3d9079f, + 0x2c03f5256c2b03d9, + 0x790f18757b53a47d, + 0x307b0130cf0c5879, + 0x31903d77257ef7f9, + 0x699468bdbd96bbaf, + // 2^24 * 2 * G + 0xbd1f2f46f4dafecf, + 0x7cef0114a47fd6f7, + 0xd31ffdda4a47b37f, + 0x525219a473905785, + 0xd8dd3de66aa91948, + 0x485064c22fc0d2cc, + 0x9b48246634fdea2f, + 0x293e1c4e6c4a2e3a, + 0x376e134b925112e1, + 0x703778b5dca15da0, + 0xb04589af461c3111, + 0x5b605c447f032823, + // 2^24 * 3 * G + 0xb965805920c47c89, + 0xe7f0100c923b8fcc, + 0x0001256502e2ef77, + 0x24a76dcea8aeb3ee, + 0x3be9fec6f0e7f04c, + 0x866a579e75e34962, + 0x5542ef161e1de61a, + 0x2f12fef4cc5abdd5, + 0x0a4522b2dfc0c740, + 0x10d06e7f40c9a407, + 0xc6cf144178cff668, + 0x5e607b2518a43790, + // 2^24 * 4 * G + 0x58b31d8f6cdf1818, + 0x35cfa74fc36258a2, + 0xe1b3ff4f66e61d6e, + 0x5067acab6ccdd5f7, + 0xa02c431ca596cf14, + 0xe3c42d40aed3e400, + 0xd24526802e0f26db, + 0x201f33139e457068, + 0xfd527f6b08039d51, + 0x18b14964017c0006, + 0xd5220eb02e25a4a8, + 0x397cba8862460375, + // 2^24 * 5 * G + 0x30c13093f05959b2, + 0xe23aa18de9a97976, + 0x222fd491721d5e26, + 0x2339d320766e6c3a, + 0x7815c3fbc81379e7, + 0xa6619420dde12af1, + 0xffa9c0f885a8fdd5, + 0x771b4022c1e1c252, + 0xd87dd986513a2fa7, + 0xf5ac9b71f9d4cf08, + 0xd06bc31b1ea283b3, + 0x331a189219971a76, + // 2^24 * 6 * G + 0xf5166f45fb4f80c6, + 0x9c36c7de61c775cf, + 0xe3d4e81b9041d91c, + 0x31167c6b83bdfe21, + 0x26512f3a9d7572af, + 0x5bcbe28868074a9e, + 0x84edc1c11180f7c4, + 0x1ac9619ff649a67b, + 0xf22b3842524b1068, + 0x5068343bee9ce987, + 0xfc9d71844a6250c8, + 0x612436341f08b111, + // 2^24 * 7 * G + 0xd99d41db874e898d, + 0x09fea5f16c07dc20, + 0x793d2c67d00f9bbc, + 0x46ebe2309e5eff40, + 0x8b6349e31a2d2638, + 0x9ddfb7009bd3fd35, + 0x7f8bf1b8a3a06ba4, + 0x1522aa3178d90445, + 0x2c382f5369614938, + 0xdafe409ab72d6d10, + 0xe8c83391b646f227, + 0x45fe70f50524306c, + // 2^24 * 8 * G + 0xda4875a6960c0b8c, + 0x5b68d076ef0e2f20, + 0x07fb51cf3d0b8fd4, + 0x428d1623a0e392d4, + 0x62f24920c8951491, + 0x05f007c83f630ca2, + 0x6fbb45d2f5c9d4b8, + 0x16619f6db57a2245, + 0x084f4a4401a308fd, + 0xa82219c376a5caac, + 0xdeb8de4643d1bc7d, + 0x1d81592d60bd38c6, + // 2^28 * 1 * G + 0xd833d7beec2a4c38, + 0x2c9162830acc20ed, + 0xe93a47aa92df7581, + 0x702d67a3333c4a81, + 0x3a4a369a2f89c8a1, + 0x63137a1d7c8de80d, + 0xbcac008a78eda015, + 0x2cb8b3a5b483b03f, + 0x36e417cbcb1b90a1, + 0x33b3ddaa7f11794e, + 0x3f510808885bc607, + 0x24141dc0e6a8020d, + // 2^28 * 2 * G + 0x59f73c773fefee9d, + 0xb3f1ef89c1cf989d, + 0xe35dfb42e02e545f, + 0x5766120b47a1b47c, + 0x91925dccbd83157d, + 0x3ca1205322cc8094, + 0x28e57f183f90d6e4, + 0x1a4714cede2e767b, + 0xdb20ba0fb8b6b7ff, + 0xb732c3b677511fa1, + 0xa92b51c099f02d89, + 0x4f3875ad489ca5f1, + // 2^28 * 3 * G + 0xc7fc762f4932ab22, + 0x7ac0edf72f4c3c1b, + 0x5f6b55aa9aa895e8, + 0x3680274dad0a0081, + 0x79ed13f6ee73eec0, + 0xa5c6526d69110bb1, + 0xe48928c38603860c, + 0x722a1446fd7059f5, + 0xd0959fe9a8cf8819, + 0xd0a995508475a99c, + 0x6eac173320b09cc5, + 0x628ecf04331b1095, + // 2^28 * 4 * G + 0x98bcb118a9d0ddbc, + 0xee449e3408b4802b, + 0x87089226b8a6b104, + 0x685f349a45c7915d, + 0x9b41acf85c74ccf1, + 0xb673318108265251, + 0x99c92aed11adb147, + 0x7a47d70d34ecb40f, + 0x60a0c4cbcc43a4f5, + 0x775c66ca3677bea9, + 0xa17aa1752ff8f5ed, + 0x11ded9020e01fdc0, + // 2^28 * 5 * G + 0x890e7809caefe704, + 0x8728296de30e8c6c, + 0x4c5cd2a392aeb1c9, + 0x194263d15771531f, + 0x471f95b03bea93b7, + 0x0552d7d43313abd3, + 0xbd9370e2e17e3f7b, + 0x7b120f1db20e5bec, + 0x17d2fb3d86502d7a, + 0xb564d84450a69352, + 0x7da962c8a60ed75d, + 0x00d0f85b318736aa, + // 2^28 * 6 * G + 0x978b142e777c84fd, + 0xf402644705a8c062, + 0xa67ad51be7e612c7, + 0x2f7b459698dd6a33, + 0xa6753c1efd7621c1, + 0x69c0b4a7445671f5, + 0x971f527405b23c11, + 0x387bc74851a8c7cd, + 0x81894b4d4a52a9a8, + 0xadd93e12f6b8832f, + 0x184d8548b61bd638, + 0x3f1c62dbd6c9f6cd, + // 2^28 * 7 * G + 0x2e8f1f0091910c1f, + 0xa4df4fe0bff2e12c, + 0x60c6560aee927438, + 0x6338283facefc8fa, + 0x3fad3e40148f693d, + 0x052656e194eb9a72, + 0x2f4dcbfd184f4e2f, + 0x406f8db1c482e18b, + 0x9e630d2c7f191ee4, + 0x4fbf8301bc3ff670, + 0x787d8e4e7afb73c4, + 0x50d83d5be8f58fa5, + // 2^28 * 8 * G + 0x85683916c11a1897, + 0x2d69a4efe506d008, + 0x39af1378f664bd01, + 0x65942131361517c6, + 0xc0accf90b4d3b66d, + 0xa7059de561732e60, + 0x033d1f7870c6b0ba, + 0x584161cd26d946e4, + 0xbbf2b1a072d27ca2, + 0xbf393c59fbdec704, + 0xe98dbbcee262b81e, + 0x02eebd0b3029b589, + // 2^32 * 1 * G + 0x61368756a60dac5f, + 0x17e02f6aebabdc57, + 0x7f193f2d4cce0f7d, + 0x20234a7789ecdcf0, + 0x8765b69f7b85c5e8, + 0x6ff0678bd168bab2, + 0x3a70e77c1d330f9b, + 0x3a5f6d51b0af8e7c, + 0x76d20db67178b252, + 0x071c34f9d51ed160, + 0xf62a4a20b3e41170, + 0x7cd682353cffe366, + // 2^32 * 2 * G + 0x0be1a45bd887fab6, + 0x2a846a32ba403b6e, + 0xd9921012e96e6000, + 0x2838c8863bdc0943, + 0xa665cd6068acf4f3, + 0x42d92d183cd7e3d3, + 0x5759389d336025d9, + 0x3ef0253b2b2cd8ff, + 0xd16bb0cf4a465030, + 0xfa496b4115c577ab, + 0x82cfae8af4ab419d, + 0x21dcb8a606a82812, + // 2^32 * 3 * G + 0x5c6004468c9d9fc8, + 0x2540096ed42aa3cb, + 0x125b4d4c12ee2f9c, + 0x0bc3d08194a31dab, + 0x9a8d00fabe7731ba, + 0x8203607e629e1889, + 0xb2cc023743f3d97f, + 0x5d840dbf6c6f678b, + 0x706e380d309fe18b, + 0x6eb02da6b9e165c7, + 0x57bbba997dae20ab, + 0x3a4276232ac196dd, + // 2^32 * 4 * G + 0x4b42432c8a7084fa, + 0x898a19e3dfb9e545, + 0xbe9f00219c58e45d, + 0x1ff177cea16debd1, + 0x3bf8c172db447ecb, + 0x5fcfc41fc6282dbd, + 0x80acffc075aa15fe, + 0x0770c9e824e1a9f9, + 0xcf61d99a45b5b5fd, + 0x860984e91b3a7924, + 0xe7300919303e3e89, + 0x39f264fd41500b1e, + // 2^32 * 5 * G + 0xa7ad3417dbe7e29c, + 0xbd94376a2b9c139c, + 0xa0e91b8e93597ba9, + 0x1712d73468889840, + 0xd19b4aabfe097be1, + 0xa46dfce1dfe01929, + 0xc3c908942ca6f1ff, + 0x65c621272c35f14e, + 0xe72b89f8ce3193dd, + 0x4d103356a125c0bb, + 0x0419a93d2e1cfe83, + 0x22f9800ab19ce272, + // 2^32 * 6 * G + 0x605a368a3e9ef8cb, + 0xe3e9c022a5504715, + 0x553d48b05f24248f, + 0x13f416cd647626e5, + 0x42029fdd9a6efdac, + 0xb912cebe34a54941, + 0x640f64b987bdf37b, + 0x4171a4d38598cab4, + 0xfa2758aa99c94c8c, + 0x23006f6fb000b807, + 0xfbd291ddadda5392, + 0x508214fa574bd1ab, + // 2^32 * 7 * G + 0xc20269153ed6fe4b, + 0xa65a6739511d77c4, + 0xcbde26462c14af94, + 0x22f960ec6faba74b, + 0x461a15bb53d003d6, + 0xb2102888bcf3c965, + 0x27c576756c683a5a, + 0x3a7758a4c86cb447, + 0x548111f693ae5076, + 0x1dae21df1dfd54a6, + 0x12248c90f3115e65, + 0x5d9fd15f8de7f494, + // 2^32 * 8 * G + 0x031408d36d63727f, + 0x6a379aefd7c7b533, + 0xa9e18fc5ccaee24b, + 0x332f35914f8fbed3, + 0x3f244d2aeed7521e, + 0x8e3a9028432e9615, + 0xe164ba772e9c16d4, + 0x3bc187fa47eb98d8, + 0x6d470115ea86c20c, + 0x998ab7cb6c46d125, + 0xd77832b53a660188, + 0x450d81ce906fba03, + // 2^36 * 1 * G + 0xf8ae4d2ad8453902, + 0x7018058ee8db2d1d, + 0xaab3995fc7d2c11e, + 0x53b16d2324ccca79, + 0x23264d66b2cae0b5, + 0x7dbaed33ebca6576, + 0x030ebed6f0d24ac8, + 0x2a887f78f7635510, + 0x2a23b9e75c012d4f, + 0x0c974651cae1f2ea, + 0x2fb63273675d70ca, + 0x0ba7250b864403f5, + // 2^36 * 2 * G + 0xbb0d18fd029c6421, + 0xbc2d142189298f02, + 0x8347f8e68b250e96, + 0x7b9f2fe8032d71c9, + 0xdd63589386f86d9c, + 0x61699176e13a85a4, + 0x2e5111954eaa7d57, + 0x32c21b57fb60bdfb, + 0xd87823cd319e0780, + 0xefc4cfc1897775c5, + 0x4854fb129a0ab3f7, + 0x12c49d417238c371, + // 2^36 * 3 * G + 0x0950b533ffe83769, + 0x21861c1d8e1d6bd1, + 0xf022d8381302e510, + 0x2509200c6391cab4, + 0x09b3a01783799542, + 0x626dd08faad5ee3f, + 0xba00bceeeb70149f, + 0x1421b246a0a444c9, + 0x4aa43a8e8c24a7c7, + 0x04c1f540d8f05ef5, + 0xadba5e0c0b3eb9dc, + 0x2ab5504448a49ce3, + // 2^36 * 4 * G + 0x2ed227266f0f5dec, + 0x9824ee415ed50824, + 0x807bec7c9468d415, + 0x7093bae1b521e23f, + 0xdc07ac631c5d3afa, + 0x58615171f9df8c6c, + 0x72a079d89d73e2b0, + 0x7301f4ceb4eae15d, + 0x6409e759d6722c41, + 0xa674e1cf72bf729b, + 0xbc0a24eb3c21e569, + 0x390167d24ebacb23, + // 2^36 * 5 * G + 0x27f58e3bba353f1c, + 0x4c47764dbf6a4361, + 0xafbbc4e56e562650, + 0x07db2ee6aae1a45d, + 0xd7bb054ba2f2120b, + 0xe2b9ceaeb10589b7, + 0x3fe8bac8f3c0edbe, + 0x4cbd40767112cb69, + 0x0b603cc029c58176, + 0x5988e3825cb15d61, + 0x2bb61413dcf0ad8d, + 0x7b8eec6c74183287, + // 2^36 * 6 * G + 0xe4ca40782cd27cb0, + 0xdaf9c323fbe967bd, + 0xb29bd34a8ad41e9e, + 0x72810497626ede4d, + 0x32fee570fc386b73, + 0xda8b0141da3a8cc7, + 0x975ffd0ac8968359, + 0x6ee809a1b132a855, + 0x9444bb31fcfd863a, + 0x2fe3690a3e4e48c5, + 0xdc29c867d088fa25, + 0x13bd1e38d173292e, + // 2^36 * 7 * G + 0xd32b4cd8696149b5, + 0xe55937d781d8aab7, + 0x0bcb2127ae122b94, + 0x41e86fcfb14099b0, + 0x223fb5cf1dfac521, + 0x325c25316f554450, + 0x030b98d7659177ac, + 0x1ed018b64f88a4bd, + 0x3630dfa1b802a6b0, + 0x880f874742ad3bd5, + 0x0af90d6ceec5a4d4, + 0x746a247a37cdc5d9, + // 2^36 * 8 * G + 0xd531b8bd2b7b9af6, + 0x5005093537fc5b51, + 0x232fcf25c593546d, + 0x20a365142bb40f49, + 0x6eccd85278d941ed, + 0x2254ae83d22f7843, + 0xc522d02e7bbfcdb7, + 0x681e3351bff0e4e2, + 0x8b64b59d83034f45, + 0x2f8b71f21fa20efb, + 0x69249495ba6550e4, + 0x539ef98e45d5472b, + // 2^40 * 1 * G + 0x6e7bb6a1a6205275, + 0xaa4f21d7413c8e83, + 0x6f56d155e88f5cb2, + 0x2de25d4ba6345be1, + 0xd074d8961cae743f, + 0xf86d18f5ee1c63ed, + 0x97bdc55be7f4ed29, + 0x4cbad279663ab108, + 0x80d19024a0d71fcd, + 0xc525c20afb288af8, + 0xb1a3974b5f3a6419, + 0x7d7fbcefe2007233, + // 2^40 * 2 * G + 0xfaef1e6a266b2801, + 0x866c68c4d5739f16, + 0xf68a2fbc1b03762c, + 0x5975435e87b75a8d, + 0xcd7c5dc5f3c29094, + 0xc781a29a2a9105ab, + 0x80c61d36421c3058, + 0x4f9cd196dcd8d4d7, + 0x199297d86a7b3768, + 0xd0d058241ad17a63, + 0xba029cad5c1c0c17, + 0x7ccdd084387a0307, + // 2^40 * 3 * G + 0xdca6422c6d260417, + 0xae153d50948240bd, + 0xa9c0c1b4fb68c677, + 0x428bd0ed61d0cf53, + 0x9b0c84186760cc93, + 0xcdae007a1ab32a99, + 0xa88dec86620bda18, + 0x3593ca848190ca44, + 0x9213189a5e849aa7, + 0xd4d8c33565d8facd, + 0x8c52545b53fdbbd1, + 0x27398308da2d63e6, + // 2^40 * 4 * G + 0x42c38d28435ed413, + 0xbd50f3603278ccc9, + 0xbb07ab1a79da03ef, + 0x269597aebe8c3355, + 0xb9a10e4c0a702453, + 0x0fa25866d57d1bde, + 0xffb9d9b5cd27daf7, + 0x572c2945492c33fd, + 0xc77fc745d6cd30be, + 0xe4dfe8d3e3baaefb, + 0xa22c8830aa5dda0c, + 0x7f985498c05bca80, + // 2^40 * 5 * G + 0x3849ce889f0be117, + 0x8005ad1b7b54a288, + 0x3da3c39f23fc921c, + 0x76c2ec470a31f304, + 0xd35615520fbf6363, + 0x08045a45cf4dfba6, + 0xeec24fbc873fa0c2, + 0x30f2653cd69b12e7, + 0x8a08c938aac10c85, + 0x46179b60db276bcb, + 0xa920c01e0e6fac70, + 0x2f1273f1596473da, + // 2^40 * 6 * G + 0x4739fc7c8ae01e11, + 0xfd5274904a6aab9f, + 0x41d98a8287728f2e, + 0x5d9e572ad85b69f2, + 0x30488bd755a70bc0, + 0x06d6b5a4f1d442e7, + 0xead1a69ebc596162, + 0x38ac1997edc5f784, + 0x0666b517a751b13b, + 0x747d06867e9b858c, + 0xacacc011454dde49, + 0x22dfcd9cbfe9e69c, + // 2^40 * 7 * G + 0x8ddbd2e0c30d0cd9, + 0xad8e665facbb4333, + 0x8f6b258c322a961f, + 0x6b2916c05448c1c7, + 0x56ec59b4103be0a1, + 0x2ee3baecd259f969, + 0x797cb29413f5cd32, + 0x0fe9877824cde472, + 0x7edb34d10aba913b, + 0x4ea3cd822e6dac0e, + 0x66083dff6578f815, + 0x4c303f307ff00a17, + // 2^40 * 8 * G + 0xd30a3bd617b28c85, + 0xc5d377b739773bea, + 0xc6c6e78c1e6a5cbf, + 0x0d61b8f78b2ab7c4, + 0x29fc03580dd94500, + 0xecd27aa46fbbec93, + 0x130a155fc2e2a7f8, + 0x416b151ab706a1d5, + 0x56a8d7efe9c136b0, + 0xbd07e5cd58e44b20, + 0xafe62fda1b57e0ab, + 0x191a2af74277e8d2, + // 2^44 * 1 * G + 0xd550095bab6f4985, + 0x04f4cd5b4fbfaf1a, + 0x9d8e2ed12a0c7540, + 0x2bc24e04b2212286, + 0x09d4b60b2fe09a14, + 0xc384f0afdbb1747e, + 0x58e2ea8978b5fd6e, + 0x519ef577b5e09b0a, + 0x1863d7d91124cca9, + 0x7ac08145b88a708e, + 0x2bcd7309857031f5, + 0x62337a6e8ab8fae5, + // 2^44 * 2 * G + 0x4bcef17f06ffca16, + 0xde06e1db692ae16a, + 0x0753702d614f42b0, + 0x5f6041b45b9212d0, + 0xd1ab324e1b3a1273, + 0x18947cf181055340, + 0x3b5d9567a98c196e, + 0x7fa00425802e1e68, + 0x7d531574028c2705, + 0x80317d69db0d75fe, + 0x30fface8ef8c8ddd, + 0x7e9de97bb6c3e998, + // 2^44 * 3 * G + 0x1558967b9e6585a3, + 0x97c99ce098e98b92, + 0x10af149b6eb3adad, + 0x42181fe8f4d38cfa, + 0xf004be62a24d40dd, + 0xba0659910452d41f, + 0x81c45ee162a44234, + 0x4cb829d8a22266ef, + 0x1dbcaa8407b86681, + 0x081f001e8b26753b, + 0x3cd7ce6a84048e81, + 0x78af11633f25f22c, + // 2^44 * 4 * G + 0x8416ebd40b50babc, + 0x1508722628208bee, + 0xa3148fafb9c1c36d, + 0x0d07daacd32d7d5d, + 0x3241c00e7d65318c, + 0xe6bee5dcd0e86de7, + 0x118b2dc2fbc08c26, + 0x680d04a7fc603dc3, + 0xf9c2414a695aa3eb, + 0xdaa42c4c05a68f21, + 0x7c6c23987f93963e, + 0x210e8cd30c3954e3, + // 2^44 * 5 * G + 0xac4201f210a71c06, + 0x6a65e0aef3bfb021, + 0xbc42c35c393632f7, + 0x56ea8db1865f0742, + 0x2b50f16137fe6c26, + 0xe102bcd856e404d8, + 0x12b0f1414c561f6b, + 0x51b17bc8d028ec91, + 0xfff5fb4bcf535119, + 0xf4989d79df1108a0, + 0xbdfcea659a3ba325, + 0x18a11f1174d1a6f2, + // 2^44 * 6 * G + 0x407375ab3f6bba29, + 0x9ec3b6d8991e482e, + 0x99c80e82e55f92e9, + 0x307c13b6fb0c0ae1, + 0xfbd63cdad27a5f2c, + 0xf00fc4bc8aa106d7, + 0x53fb5c1a8e64a430, + 0x04eaabe50c1a2e85, + 0x24751021cb8ab5e7, + 0xfc2344495c5010eb, + 0x5f1e717b4e5610a1, + 0x44da5f18c2710cd5, + // 2^44 * 7 * G + 0x033cc55ff1b82eb5, + 0xb15ae36d411cae52, + 0xba40b6198ffbacd3, + 0x768edce1532e861f, + 0x9156fe6b89d8eacc, + 0xe6b79451e23126a1, + 0xbd7463d93944eb4e, + 0x726373f6767203ae, + 0xe305ca72eb7ef68a, + 0x662cf31f70eadb23, + 0x18f026fdb4c45b68, + 0x513b5384b5d2ecbd, + // 2^44 * 8 * G + 0x46d46280c729989e, + 0x4b93fbd05368a5dd, + 0x63df3f81d1765a89, + 0x34cebd64b9a0a223, + 0x5e2702878af34ceb, + 0x900b0409b946d6ae, + 0x6512ebf7dabd8512, + 0x61d9b76988258f81, + 0xa6c5a71349b7d94b, + 0xa3f3d15823eb9446, + 0x0416fbd277484834, + 0x69d45e6f2c70812f, + // 2^48 * 1 * G + 0xce16f74bc53c1431, + 0x2b9725ce2072edde, + 0xb8b9c36fb5b23ee7, + 0x7e2e0e450b5cc908, + 0x9fe62b434f460efb, + 0xded303d4a63607d6, + 0xf052210eb7a0da24, + 0x237e7dbe00545b93, + 0x013575ed6701b430, + 0x231094e69f0bfd10, + 0x75320f1583e47f22, + 0x71afa699b11155e3, + // 2^48 * 2 * G + 0x65ce6f9b3953b61d, + 0xc65839eaafa141e6, + 0x0f435ffda9f759fe, + 0x021142e9c2b1c28e, + 0xea423c1c473b50d6, + 0x51e87a1f3b38ef10, + 0x9b84bf5fb2c9be95, + 0x00731fbc78f89a1c, + 0xe430c71848f81880, + 0xbf960c225ecec119, + 0xb6dae0836bba15e3, + 0x4c4d6f3347e15808, + // 2^48 * 3 * G + 0x18f7eccfc17d1fc9, + 0x6c75f5a651403c14, + 0xdbde712bf7ee0cdf, + 0x193fddaaa7e47a22, + 0x2f0cddfc988f1970, + 0x6b916227b0b9f51b, + 0x6ec7b6c4779176be, + 0x38bf9500a88f9fa8, + 0x1fd2c93c37e8876f, + 0xa2f61e5a18d1462c, + 0x5080f58239241276, + 0x6a6fb99ebf0d4969, + // 2^48 * 4 * G + 0x6a46c1bb560855eb, + 0x2416bb38f893f09d, + 0xd71d11378f71acc1, + 0x75f76914a31896ea, + 0xeeb122b5b6e423c6, + 0x939d7010f286ff8e, + 0x90a92a831dcf5d8c, + 0x136fda9f42c5eb10, + 0xf94cdfb1a305bdd1, + 0x0f364b9d9ff82c08, + 0x2a87d8a5c3bb588a, + 0x022183510be8dcba, + // 2^48 * 5 * G + 0x4af766385ead2d14, + 0xa08ed880ca7c5830, + 0x0d13a6e610211e3d, + 0x6a071ce17b806c03, + 0x9d5a710143307a7f, + 0xb063de9ec47da45f, + 0x22bbfe52be927ad3, + 0x1387c441fd40426c, + 0xb5d3c3d187978af8, + 0x722b5a3d7f0e4413, + 0x0d7b4848bb477ca0, + 0x3171b26aaf1edc92, + // 2^48 * 6 * G + 0xa92f319097564ca8, + 0xff7bb84c2275e119, + 0x4f55fe37a4875150, + 0x221fd4873cf0835a, + 0xa60db7d8b28a47d1, + 0xa6bf14d61770a4f1, + 0xd4a1f89353ddbd58, + 0x6c514a63344243e9, + 0x2322204f3a156341, + 0xfb73e0e9ba0a032d, + 0xfce0dd4c410f030e, + 0x48daa596fb924aaa, + // 2^48 * 7 * G + 0x6eca8e665ca59cc7, + 0xa847254b2e38aca0, + 0x31afc708d21e17ce, + 0x676dd6fccad84af7, + 0x14f61d5dc84c9793, + 0x9941f9e3ef418206, + 0xcdf5b88f346277ac, + 0x58c837fa0e8a79a9, + 0x0cf9688596fc9058, + 0x1ddcbbf37b56a01b, + 0xdcc2e77d4935d66a, + 0x1c4f73f2c6a57f0a, + // 2^48 * 8 * G + 0x0e7a4fbd305fa0bb, + 0x829d4ce054c663ad, + 0xf421c3832fe33848, + 0x795ac80d1bf64c42, + 0xb36e706efc7c3484, + 0x73dfc9b4c3c1cf61, + 0xeb1d79c9781cc7e5, + 0x70459adb7daf675c, + 0x1b91db4991b42bb3, + 0x572696234b02dcca, + 0x9fdf9ee51f8c78dc, + 0x5fe162848ce21fd3, + // 2^52 * 1 * G + 0xe2790aae4d077c41, + 0x8b938270db7469a3, + 0x6eb632dc8abd16a2, + 0x720814ecaa064b72, + 0x315c29c795115389, + 0xd7e0e507862f74ce, + 0x0c4a762185927432, + 0x72de6c984a25a1e4, + 0xae9ab553bf6aa310, + 0x050a50a9806d6e1b, + 0x92bb7403adff5139, + 0x0394d27645be618b, + // 2^52 * 2 * G + 0x4d572251857eedf4, + 0xe3724edde19e93c5, + 0x8a71420e0b797035, + 0x3b3c833687abe743, + 0xf5396425b23545a4, + 0x15a7a27e98fbb296, + 0xab6c52bc636fdd86, + 0x79d995a8419334ee, + 0xcd8a8ea61195dd75, + 0xa504d8a81dd9a82f, + 0x540dca81a35879b6, + 0x60dd16a379c86a8a, + // 2^52 * 3 * G + 0x35a2c8487381e559, + 0x596ffea6d78082cb, + 0xcb9771ebdba7b653, + 0x5a08b5019b4da685, + 0x3501d6f8153e47b8, + 0xb7a9675414a2f60c, + 0x112ee8b6455d9523, + 0x4e62a3c18112ea8a, + 0xc8d4ac04516ab786, + 0x595af3215295b23d, + 0xd6edd234db0230c1, + 0x0929efe8825b41cc, + // 2^52 * 4 * G + 0x5f0601d1cbd0f2d3, + 0x736e412f6132bb7f, + 0x83604432238dde87, + 0x1e3a5272f5c0753c, + 0x8b3172b7ad56651d, + 0x01581b7a3fabd717, + 0x2dc94df6424df6e4, + 0x30376e5d2c29284f, + 0xd2918da78159a59c, + 0x6bdc1cd93f0713f3, + 0x565f7a934acd6590, + 0x53daacec4cb4c128, + // 2^52 * 5 * G + 0x4ca73bd79cc8a7d6, + 0x4d4a738f47e9a9b2, + 0xf4cbf12942f5fe00, + 0x01a13ff9bdbf0752, + 0x99852bc3852cfdb0, + 0x2cc12e9559d6ed0b, + 0x70f9e2bf9b5ac27b, + 0x4f3b8c117959ae99, + 0x55b6c9c82ff26412, + 0x1ac4a8c91fb667a8, + 0xd527bfcfeb778bf2, + 0x303337da7012a3be, + // 2^52 * 6 * G + 0x955422228c1c9d7c, + 0x01fac1371a9b340f, + 0x7e8d9177925b48d7, + 0x53f8ad5661b3e31b, + 0x976d3ccbfad2fdd1, + 0xcb88839737a640a8, + 0x2ff00c1d6734cb25, + 0x269ff4dc789c2d2b, + 0x0c003fbdc08d678d, + 0x4d982fa37ead2b17, + 0xc07e6bcdb2e582f1, + 0x296c7291df412a44, + // 2^52 * 7 * G + 0x7903de2b33daf397, + 0xd0ff0619c9a624b3, + 0x8a1d252b555b3e18, + 0x2b6d581c52e0b7c0, + 0xdfb23205dab8b59e, + 0x465aeaa0c8092250, + 0xd133c1189a725d18, + 0x2327370261f117d1, + 0x3d0543d3623e7986, + 0x679414c2c278a354, + 0xae43f0cc726196f6, + 0x7836c41f8245eaba, + // 2^52 * 8 * G + 0xe7a254db49e95a81, + 0x5192d5d008b0ad73, + 0x4d20e5b1d00afc07, + 0x5d55f8012cf25f38, + 0xca651e848011937c, + 0xc6b0c46e6ef41a28, + 0xb7021ba75f3f8d52, + 0x119dff99ead7b9fd, + 0x43eadfcbf4b31d4d, + 0xc6503f7411148892, + 0xfeee68c5060d3b17, + 0x329293b3dd4a0ac8, + // 2^56 * 1 * G + 0x4e59214fe194961a, + 0x49be7dc70d71cd4f, + 0x9300cfd23b50f22d, + 0x4789d446fc917232, + 0x2879852d5d7cb208, + 0xb8dedd70687df2e7, + 0xdc0bffab21687891, + 0x2b44c043677daa35, + 0x1a1c87ab074eb78e, + 0xfac6d18e99daf467, + 0x3eacbbcd484f9067, + 0x60c52eef2bb9a4e4, + // 2^56 * 2 * G + 0x0b5d89bc3bfd8bf1, + 0xb06b9237c9f3551a, + 0x0e4c16b0d53028f5, + 0x10bc9c312ccfcaab, + 0x702bc5c27cae6d11, + 0x44c7699b54a48cab, + 0xefbc4056ba492eb2, + 0x70d77248d9b6676d, + 0xaa8ae84b3ec2a05b, + 0x98699ef4ed1781e0, + 0x794513e4708e85d1, + 0x63755bd3a976f413, + // 2^56 * 3 * G + 0xb55fa03e2ad10853, + 0x356f75909ee63569, + 0x9ff9f1fdbe69b890, + 0x0d8cc1c48bc16f84, + 0x3dc7101897f1acb7, + 0x5dda7d5ec165bbd8, + 0x508e5b9c0fa1020f, + 0x2763751737c52a56, + 0x029402d36eb419a9, + 0xf0b44e7e77b460a5, + 0xcfa86230d43c4956, + 0x70c2dd8a7ad166e7, + // 2^56 * 4 * G + 0x656194509f6fec0e, + 0xee2e7ea946c6518d, + 0x9733c1f367e09b5c, + 0x2e0fac6363948495, + 0x91d4967db8ed7e13, + 0x74252f0ad776817a, + 0xe40982e00d852564, + 0x32b8613816a53ce5, + 0x79e7f7bee448cd64, + 0x6ac83a67087886d0, + 0xf89fd4d9a0e4db2e, + 0x4179215c735a4f41, + // 2^56 * 5 * G + 0x8c7094e7d7dced2a, + 0x97fb8ac347d39c70, + 0xe13be033a906d902, + 0x700344a30cd99d76, + 0xe4ae33b9286bcd34, + 0xb7ef7eb6559dd6dc, + 0x278b141fb3d38e1f, + 0x31fa85662241c286, + 0xaf826c422e3622f4, + 0xc12029879833502d, + 0x9bc1b7e12b389123, + 0x24bb2312a9952489, + // 2^56 * 6 * G + 0xb1a8ed1732de67c3, + 0x3cb49418461b4948, + 0x8ebd434376cfbcd2, + 0x0fee3e871e188008, + 0x41f80c2af5f85c6b, + 0x687284c304fa6794, + 0x8945df99a3ba1bad, + 0x0d1d2af9ffeb5d16, + 0xa9da8aa132621edf, + 0x30b822a159226579, + 0x4004197ba79ac193, + 0x16acd79718531d76, + // 2^56 * 7 * G + 0x72df72af2d9b1d3d, + 0x63462a36a432245a, + 0x3ecea07916b39637, + 0x123e0ef6b9302309, + 0xc959c6c57887b6ad, + 0x94e19ead5f90feba, + 0x16e24e62a342f504, + 0x164ed34b18161700, + 0x487ed94c192fe69a, + 0x61ae2cea3a911513, + 0x877bf6d3b9a4de27, + 0x78da0fc61073f3eb, + // 2^56 * 8 * G + 0x5bf15d28e52bc66a, + 0x2c47e31870f01a8e, + 0x2419afbc06c28bdd, + 0x2d25deeb256b173a, + 0xa29f80f1680c3a94, + 0x71f77e151ae9e7e6, + 0x1100f15848017973, + 0x054aa4b316b38ddd, + 0xdfc8468d19267cb8, + 0x0b28789c66e54daf, + 0x2aeb1d2a666eec17, + 0x134610a6ab7da760, + // 2^60 * 1 * G + 0xcaf55ec27c59b23f, + 0x99aeed3e154d04f2, + 0x68441d72e14141f4, + 0x140345133932a0a2, + 0xd91430e0dc028c3c, + 0x0eb955a85217c771, + 0x4b09e1ed2c99a1fa, + 0x42881af2bd6a743c, + 0x7bfec69aab5cad3d, + 0xc23e8cd34cb2cfad, + 0x685dd14bfb37d6a2, + 0x0ad6d64415677a18, + // 2^60 * 2 * G + 0x781a439e417becb5, + 0x4ac5938cd10e0266, + 0x5da385110692ac24, + 0x11b065a2ade31233, + 0x7914892847927e9f, + 0x33dad6ef370aa877, + 0x1f8f24fa11122703, + 0x5265ac2f2adf9592, + 0x405fdd309afcb346, + 0xd9723d4428e63f54, + 0x94c01df05f65aaae, + 0x43e4dc3ae14c0809, + // 2^60 * 3 * G + 0xbc12c7f1a938a517, + 0x473028ab3180b2e1, + 0x3f78571efbcd254a, + 0x74e534426ff6f90f, + 0xea6f7ac3adc2c6a3, + 0xd0e928f6e9717c94, + 0xe2d379ead645eaf5, + 0x46dd8785c51ffbbe, + 0x709801be375c8898, + 0x4b06dab5e3fd8348, + 0x75880ced27230714, + 0x2b09468fdd2f4c42, + // 2^60 * 4 * G + 0x97c749eeb701cb96, + 0x83f438d4b6a369c3, + 0x62962b8b9a402cd9, + 0x6976c7509888df7b, + 0x5b97946582ffa02a, + 0xda096a51fea8f549, + 0xa06351375f77af9b, + 0x1bcfde61201d1e76, + 0x4a4a5490246a59a2, + 0xd63ebddee87fdd90, + 0xd9437c670d2371fa, + 0x69e87308d30f8ed6, + // 2^60 * 5 * G + 0x435a8bb15656beb0, + 0xf8fac9ba4f4d5bca, + 0xb9b278c41548c075, + 0x3eb0ef76e892b622, + 0x0f80bf028bc80303, + 0x6aae16b37a18cefb, + 0xdd47ea47d72cd6a3, + 0x61943588f4ed39aa, + 0xd26e5c3e91039f85, + 0xc0e9e77df6f33aa9, + 0xe8968c5570066a93, + 0x3c34d1881faaaddd, + // 2^60 * 6 * G + 0x3f9d2b5ea09f9ec0, + 0x1dab3b6fb623a890, + 0xa09ba3ea72d926c4, + 0x374193513fd8b36d, + 0xbd5b0b8f2fffe0d9, + 0x6aa254103ed24fb9, + 0x2ac7d7bcb26821c4, + 0x605b394b60dca36a, + 0xb4e856e45a9d1ed2, + 0xefe848766c97a9a2, + 0xb104cf641e5eee7d, + 0x2f50b81c88a71c8f, + // 2^60 * 7 * G + 0x31723c61fc6811bb, + 0x9cb450486211800f, + 0x768933d347995753, + 0x3491a53502752fcd, + 0x2b552ca0a7da522a, + 0x3230b336449b0250, + 0xf2c4c5bca4b99fb9, + 0x7b2c674958074a22, + 0xd55165883ed28cdf, + 0x12d84fd2d362de39, + 0x0a874ad3e3378e4f, + 0x000d2b1f7c763e74, + // 2^60 * 8 * G + 0x3d420811d06d4a67, + 0xbefc048590e0ffe3, + 0xf870c6b7bd487bde, + 0x6e2a7316319afa28, + 0x9624778c3e94a8ab, + 0x0ad6f3cee9a78bec, + 0x948ac7810d743c4f, + 0x76627935aaecfccc, + 0x56a8ac24d6d59a9f, + 0xc8db753e3096f006, + 0x477f41e68f4c5299, + 0x588d851cf6c86114, + // 2^64 * 1 * G + 0x51138ec78df6b0fe, + 0x5397da89e575f51b, + 0x09207a1d717af1b9, + 0x2102fdba2b20d650, + 0xcd2a65e777d1f515, + 0x548991878faa60f1, + 0xb1b73bbcdabc06e5, + 0x654878cba97cc9fb, + 0x969ee405055ce6a1, + 0x36bca7681251ad29, + 0x3a1af517aa7da415, + 0x0ad725db29ecb2ba, + // 2^64 * 2 * G + 0xdc4267b1834e2457, + 0xb67544b570ce1bc5, + 0x1af07a0bf7d15ed7, + 0x4aefcffb71a03650, + 0xfec7bc0c9b056f85, + 0x537d5268e7f5ffd7, + 0x77afc6624312aefa, + 0x4f675f5302399fd9, + 0xc32d36360415171e, + 0xcd2bef118998483b, + 0x870a6eadd0945110, + 0x0bccbb72a2a86561, + // 2^64 * 3 * G + 0x185e962feab1a9c8, + 0x86e7e63565147dcd, + 0xb092e031bb5b6df2, + 0x4024f0ab59d6b73e, + 0x186d5e4c50fe1296, + 0xe0397b82fee89f7e, + 0x3bc7f6c5507031b0, + 0x6678fd69108f37c2, + 0x1586fa31636863c2, + 0x07f68c48572d33f2, + 0x4f73cc9f789eaefc, + 0x2d42e2108ead4701, + // 2^64 * 4 * G + 0x97f5131594dfd29b, + 0x6155985d313f4c6a, + 0xeba13f0708455010, + 0x676b2608b8d2d322, + 0x21717b0d0f537593, + 0x914e690b131e064c, + 0x1bb687ae752ae09f, + 0x420bf3a79b423c6e, + 0x8138ba651c5b2b47, + 0x8671b6ec311b1b80, + 0x7bff0cb1bc3135b0, + 0x745d2ffa9c0cf1e0, + // 2^64 * 5 * G + 0xbf525a1e2bc9c8bd, + 0xea5b260826479d81, + 0xd511c70edf0155db, + 0x1ae23ceb960cf5d0, + 0x6036df5721d34e6a, + 0xb1db8827997bb3d0, + 0xd3c209c3c8756afa, + 0x06e15be54c1dc839, + 0x5b725d871932994a, + 0x32351cb5ceb1dab0, + 0x7dc41549dab7ca05, + 0x58ded861278ec1f7, + // 2^64 * 6 * G + 0xd8173793f266c55c, + 0xc8c976c5cc454e49, + 0x5ce382f8bc26c3a8, + 0x2ff39de85485f6f9, + 0x2dfb5ba8b6c2c9a8, + 0x48eeef8ef52c598c, + 0x33809107f12d1573, + 0x08ba696b531d5bd8, + 0x77ed3eeec3efc57a, + 0x04e05517d4ff4811, + 0xea3d7a3ff1a671cb, + 0x120633b4947cfe54, + // 2^64 * 7 * G + 0x0b94987891610042, + 0x4ee7b13cecebfae8, + 0x70be739594f0a4c0, + 0x35d30a99b4d59185, + 0x82bd31474912100a, + 0xde237b6d7e6fbe06, + 0xe11e761911ea79c6, + 0x07433be3cb393bde, + 0xff7944c05ce997f4, + 0x575d3de4b05c51a3, + 0x583381fd5a76847c, + 0x2d873ede7af6da9f, + // 2^64 * 8 * G + 0x157a316443373409, + 0xfab8b7eef4aa81d9, + 0xb093fee6f5a64806, + 0x2e773654707fa7b6, + 0xaa6202e14e5df981, + 0xa20d59175015e1f5, + 0x18a275d3bae21d6c, + 0x0543618a01600253, + 0x0deabdf4974c23c1, + 0xaa6f0a259dce4693, + 0x04202cb8a29aba2c, + 0x4b1443362d07960d, + // 2^68 * 1 * G + 0x47b837f753242cec, + 0x256dc48cc04212f2, + 0xe222fbfbe1d928c5, + 0x48ea295bad8a2c07, + 0x299b1c3f57c5715e, + 0x96cb929e6b686d90, + 0x3004806447235ab3, + 0x2c435c24a44d9fe1, + 0x0607c97c80f8833f, + 0x0e851578ca25ec5b, + 0x54f7450b161ebb6f, + 0x7bcb4792a0def80e, + // 2^68 * 2 * G + 0x8487e3d02bc73659, + 0x4baf8445059979df, + 0xd17c975adcad6fbf, + 0x57369f0bdefc96b6, + 0x1cecd0a0045224c2, + 0x757f1b1b69e53952, + 0x775b7a925289f681, + 0x1b6cc62016736148, + 0xf1a9990175638698, + 0x353dd1beeeaa60d3, + 0x849471334c9ba488, + 0x63fa6e6843ade311, + // 2^68 * 3 * G + 0xd15c20536597c168, + 0x9f73740098d28789, + 0x18aee7f13257ba1f, + 0x3418bfda07346f14, + 0x2195becdd24b5eb7, + 0x5e41f18cc0cd44f9, + 0xdf28074441ca9ede, + 0x07073b98f35b7d67, + 0xd03c676c4ce530d4, + 0x0b64c0473b5df9f4, + 0x065cef8b19b3a31e, + 0x3084d661533102c9, + // 2^68 * 4 * G + 0xe1f6b79ebf8469ad, + 0x15801004e2663135, + 0x9a498330af74181b, + 0x3ba2504f049b673c, + 0x9a6ce876760321fd, + 0x7fe2b5109eb63ad8, + 0x00e7d4ae8ac80592, + 0x73d86b7abb6f723a, + 0x0b52b5606dba5ab6, + 0xa9134f0fbbb1edab, + 0x30a9520d9b04a635, + 0x6813b8f37973e5db, + // 2^68 * 5 * G + 0x9854b054334127c1, + 0x105d047882fbff25, + 0xdb49f7f944186f4f, + 0x1768e838bed0b900, + 0xf194ca56f3157e29, + 0x136d35705ef528a5, + 0xdd4cef778b0599bc, + 0x7d5472af24f833ed, + 0xd0ef874daf33da47, + 0x00d3be5db6e339f9, + 0x3f2a8a2f9c9ceece, + 0x5d1aeb792352435a, + // 2^68 * 6 * G + 0xf59e6bb319cd63ca, + 0x670c159221d06839, + 0xb06d565b2150cab6, + 0x20fb199d104f12a3, + 0x12c7bfaeb61ba775, + 0xb84e621fe263bffd, + 0x0b47a5c35c840dcf, + 0x7e83be0bccaf8634, + 0x61943dee6d99c120, + 0x86101f2e460b9fe0, + 0x6bb2f1518ee8598d, + 0x76b76289fcc475cc, + // 2^68 * 7 * G + 0x791b4cc1756286fa, + 0xdbced317d74a157c, + 0x7e732421ea72bde6, + 0x01fe18491131c8e9, + 0x4245f1a1522ec0b3, + 0x558785b22a75656d, + 0x1d485a2548a1b3c0, + 0x60959eccd58fe09f, + 0x3ebfeb7ba8ed7a09, + 0x49fdc2bbe502789c, + 0x44ebce5d3c119428, + 0x35e1eb55be947f4a, + // 2^68 * 8 * G + 0xdbdae701c5738dd3, + 0xf9c6f635b26f1bee, + 0x61e96a8042f15ef4, + 0x3aa1d11faf60a4d8, + 0x14fd6dfa726ccc74, + 0x3b084cfe2f53b965, + 0xf33ae4f552a2c8b4, + 0x59aab07a0d40166a, + 0x77bcec4c925eac25, + 0x1848718460137738, + 0x5b374337fea9f451, + 0x1865e78ec8e6aa46, + // 2^72 * 1 * G + 0xccc4b7c7b66e1f7a, + 0x44157e25f50c2f7e, + 0x3ef06dfc713eaf1c, + 0x582f446752da63f7, + 0x967c54e91c529ccb, + 0x30f6269264c635fb, + 0x2747aff478121965, + 0x17038418eaf66f5c, + 0xc6317bd320324ce4, + 0xa81042e8a4488bc4, + 0xb21ef18b4e5a1364, + 0x0c2a1c4bcda28dc9, + // 2^72 * 2 * G + 0xd24dc7d06f1f0447, + 0xb2269e3edb87c059, + 0xd15b0272fbb2d28f, + 0x7c558bd1c6f64877, + 0xedc4814869bd6945, + 0x0d6d907dbe1c8d22, + 0xc63bd212d55cc5ab, + 0x5a6a9b30a314dc83, + 0xd0ec1524d396463d, + 0x12bb628ac35a24f0, + 0xa50c3a791cbc5fa4, + 0x0404a5ca0afbafc3, + // 2^72 * 3 * G + 0x8c1f40070aa743d6, + 0xccbad0cb5b265ee8, + 0x574b046b668fd2de, + 0x46395bfdcadd9633, + 0x62bc9e1b2a416fd1, + 0xb5c6f728e350598b, + 0x04343fd83d5d6967, + 0x39527516e7f8ee98, + 0x117fdb2d1a5d9a9c, + 0x9c7745bcd1005c2a, + 0xefd4bef154d56fea, + 0x76579a29e822d016, + // 2^72 * 4 * G + 0x45b68e7e49c02a17, + 0x23cd51a2bca9a37f, + 0x3ed65f11ec224c1b, + 0x43a384dc9e05bdb1, + 0x333cb51352b434f2, + 0xd832284993de80e1, + 0xb5512887750d35ce, + 0x02c514bb2a2777c1, + 0x684bd5da8bf1b645, + 0xfb8bd37ef6b54b53, + 0x313916d7a9b0d253, + 0x1160920961548059, + // 2^72 * 5 * G + 0xb44d166929dacfaa, + 0xda529f4c8413598f, + 0xe9ef63ca453d5559, + 0x351e125bc5698e0b, + 0x7a385616369b4dcd, + 0x75c02ca7655c3563, + 0x7dc21bf9d4f18021, + 0x2f637d7491e6e042, + 0xd4b49b461af67bbe, + 0xd603037ac8ab8961, + 0x71dee19ff9a699fb, + 0x7f182d06e7ce2a9a, + // 2^72 * 6 * G + 0x7a7c8e64ab0168ec, + 0xcb5a4a5515edc543, + 0x095519d347cd0eda, + 0x67d4ac8c343e93b0, + 0x09454b728e217522, + 0xaa58e8f4d484b8d8, + 0xd358254d7f46903c, + 0x44acc043241c5217, + 0x1c7d6bbb4f7a5777, + 0x8b35fed4918313e1, + 0x4adca1c6c96b4684, + 0x556d1c8312ad71bd, + // 2^72 * 7 * G + 0x17ef40e30c8d3982, + 0x31f7073e15a3fa34, + 0x4f21f3cb0773646e, + 0x746c6c6d1d824eff, + 0x81f06756b11be821, + 0x0faff82310a3f3dd, + 0xf8b2d0556a99465d, + 0x097abe38cc8c7f05, + 0x0c49c9877ea52da4, + 0x4c4369559bdc1d43, + 0x022c3809f7ccebd2, + 0x577e14a34bee84bd, + // 2^72 * 8 * G + 0xf0e268ac61a73b0a, + 0xf2fafa103791a5f5, + 0xc1e13e826b6d00e9, + 0x60fa7ee96fd78f42, + 0x94fecebebd4dd72b, + 0xf46a4fda060f2211, + 0x124a5977c0c8d1ff, + 0x705304b8fb009295, + 0xb63d1d354d296ec6, + 0xf3c3053e5fad31d8, + 0x670b958cb4bd42ec, + 0x21398e0ca16353fd, + // 2^76 * 1 * G + 0x216ab2ca8da7d2ef, + 0x366ad9dd99f42827, + 0xae64b9004fdd3c75, + 0x403a395b53909e62, + 0x86c5fc16861b7e9a, + 0xf6a330476a27c451, + 0x01667267a1e93597, + 0x05ffb9cd6082dfeb, + 0xa617fa9ff53f6139, + 0x60f2b5e513e66cb6, + 0xd7a8beefb3448aa4, + 0x7a2932856f5ea192, + // 2^76 * 2 * G + 0x0b39d761b02de888, + 0x5f550e7ed2414e1f, + 0xa6bfa45822e1a940, + 0x050a2f7dfd447b99, + 0xb89c444879639302, + 0x4ae4f19350c67f2c, + 0xf0b35da8c81af9c6, + 0x39d0003546871017, + 0x437c3b33a650db77, + 0x6bafe81dbac52bb2, + 0xfe99402d2db7d318, + 0x2b5b7eec372ba6ce, + // 2^76 * 3 * G + 0xb3bc4bbd83f50eef, + 0x508f0c998c927866, + 0x43e76587c8b7e66e, + 0x0f7655a3a47f98d9, + 0xa694404d613ac8f4, + 0x500c3c2bfa97e72c, + 0x874104d21fcec210, + 0x1b205fb38604a8ee, + 0x55ecad37d24b133c, + 0x441e147d6038c90b, + 0x656683a1d62c6fee, + 0x0157d5dc87e0ecae, + // 2^76 * 4 * G + 0xf2a7af510354c13d, + 0xd7a0b145aa372b60, + 0x2869b96a05a3d470, + 0x6528e42d82460173, + 0x95265514d71eb524, + 0xe603d8815df14593, + 0x147cdf410d4de6b7, + 0x5293b1730437c850, + 0x23d0e0814bccf226, + 0x92c745cd8196fb93, + 0x8b61796c59541e5b, + 0x40a44df0c021f978, + // 2^76 * 5 * G + 0xdaa869894f20ea6a, + 0xea14a3d14c620618, + 0x6001fccb090bf8be, + 0x35f4e822947e9cf0, + 0x86c96e514bc5d095, + 0xf20d4098fca6804a, + 0x27363d89c826ea5d, + 0x39ca36565719cacf, + 0x97506f2f6f87b75c, + 0xc624aea0034ae070, + 0x1ec856e3aad34dd6, + 0x055b0be0e440e58f, + // 2^76 * 6 * G + 0x6469a17d89735d12, + 0xdb6f27d5e662b9f1, + 0x9fcba3286a395681, + 0x363b8004d269af25, + 0x4d12a04b6ea33da2, + 0x57cf4c15e36126dd, + 0x90ec9675ee44d967, + 0x64ca348d2a985aac, + 0x99588e19e4c4912d, + 0xefcc3b4e1ca5ce6b, + 0x4522ea60fa5b98d5, + 0x7064bbab1de4a819, + // 2^76 * 7 * G + 0xb919e1515a770641, + 0xa9a2e2c74e7f8039, + 0x7527250b3df23109, + 0x756a7330ac27b78b, + 0xa290c06142542129, + 0xf2e2c2aebe8d5b90, + 0xcf2458db76abfe1b, + 0x02157ade83d626bf, + 0x3e46972a1b9a038b, + 0x2e4ee66a7ee03fb4, + 0x81a248776edbb4ca, + 0x1a944ee88ecd0563, + // 2^76 * 8 * G + 0xd5a91d1151039372, + 0x2ed377b799ca26de, + 0xa17202acfd366b6b, + 0x0730291bd6901995, + 0xbb40a859182362d6, + 0xb99f55778a4d1abb, + 0x8d18b427758559f6, + 0x26c20fe74d26235a, + 0x648d1d9fe9cc22f5, + 0x66bc561928dd577c, + 0x47d3ed21652439d1, + 0x49d271acedaf8b49, + // 2^80 * 1 * G + 0x89f5058a382b33f3, + 0x5ae2ba0bad48c0b4, + 0x8f93b503a53db36e, + 0x5aa3ed9d95a232e6, + 0x2798aaf9b4b75601, + 0x5eac72135c8dad72, + 0xd2ceaa6161b7a023, + 0x1bbfb284e98f7d4e, + 0x656777e9c7d96561, + 0xcb2b125472c78036, + 0x65053299d9506eee, + 0x4a07e14e5e8957cc, + // 2^80 * 2 * G + 0x4ee412cb980df999, + 0xa315d76f3c6ec771, + 0xbba5edde925c77fd, + 0x3f0bac391d313402, + 0x240b58cdc477a49b, + 0xfd38dade6447f017, + 0x19928d32a7c86aad, + 0x50af7aed84afa081, + 0x6e4fde0115f65be5, + 0x29982621216109b2, + 0x780205810badd6d9, + 0x1921a316baebd006, + // 2^80 * 3 * G + 0x89422f7edfb870fc, + 0x2c296beb4f76b3bd, + 0x0738f1d436c24df7, + 0x6458df41e273aeb0, + 0xd75aad9ad9f3c18b, + 0x566a0eef60b1c19c, + 0x3e9a0bac255c0ed9, + 0x7b049deca062c7f5, + 0xdccbe37a35444483, + 0x758879330fedbe93, + 0x786004c312c5dd87, + 0x6093dccbc2950e64, + // 2^80 * 4 * G + 0x1ff39a8585e0706d, + 0x36d0a5d8b3e73933, + 0x43b9f2e1718f453b, + 0x57d1ea084827a97c, + 0x6bdeeebe6084034b, + 0x3199c2b6780fb854, + 0x973376abb62d0695, + 0x6e3180c98b647d90, + 0xee7ab6e7a128b071, + 0xa4c1596d93a88baa, + 0xf7b4de82b2216130, + 0x363e999ddd97bd18, + // 2^80 * 5 * G + 0x96a843c135ee1fc4, + 0x976eb35508e4c8cf, + 0xb42f6801b58cd330, + 0x48ee9b78693a052b, + 0x2f1848dce24baec6, + 0x769b7255babcaf60, + 0x90cb3c6e3cefe931, + 0x231f979bc6f9b355, + 0x5c31de4bcc2af3c6, + 0xb04bb030fe208d1f, + 0xb78d7009c14fb466, + 0x079bfa9b08792413, + // 2^80 * 6 * G + 0xe3903a51da300df4, + 0x843964233da95ab0, + 0xed3cf12d0b356480, + 0x038c77f684817194, + 0xf3c9ed80a2d54245, + 0x0aa08b7877f63952, + 0xd76dac63d1085475, + 0x1ef4fb159470636b, + 0x854e5ee65b167bec, + 0x59590a4296d0cdc2, + 0x72b2df3498102199, + 0x575ee92a4a0bff56, + // 2^80 * 7 * G + 0xd4c080908a182fcf, + 0x30e170c299489dbd, + 0x05babd5752f733de, + 0x43d4e7112cd3fd00, + 0x5d46bc450aa4d801, + 0xc3af1227a533b9d8, + 0x389e3b262b8906c2, + 0x200a1e7e382f581b, + 0x518db967eaf93ac5, + 0x71bc989b056652c0, + 0xfe2b85d9567197f5, + 0x050eca52651e4e38, + // 2^80 * 8 * G + 0xc3431ade453f0c9c, + 0xe9f5045eff703b9b, + 0xfcd97ac9ed847b3d, + 0x4b0ee6c21c58f4c6, + 0x97ac397660e668ea, + 0x9b19bbfe153ab497, + 0x4cb179b534eca79f, + 0x6151c09fa131ae57, + 0x3af55c0dfdf05d96, + 0xdd262ee02ab4ee7a, + 0x11b2bb8712171709, + 0x1fef24fa800f030b, + // 2^84 * 1 * G + 0xb496123a6b6c6609, + 0xa750fe8580ab5938, + 0xf471bf39b7c27a5f, + 0x507903ce77ac193c, + 0xff91a66a90166220, + 0xf22552ae5bf1e009, + 0x7dff85d87f90df7c, + 0x4f620ffe0c736fb9, + 0x62f90d65dfde3e34, + 0xcf28c592b9fa5fad, + 0x99c86ef9c6164510, + 0x25d448044a256c84, + // 2^84 * 2 * G + 0xbd68230ec7e9b16f, + 0x0eb1b9c1c1c5795d, + 0x7943c8c495b6b1ff, + 0x2f9faf620bbacf5e, + 0x2c7c4415c9022b55, + 0x56a0d241812eb1fe, + 0xf02ea1c9d7b65e0d, + 0x4180512fd5323b26, + 0xa4ff3e698a48a5db, + 0xba6a3806bd95403b, + 0x9f7ce1af47d5b65d, + 0x15e087e55939d2fb, + // 2^84 * 3 * G + 0x12207543745c1496, + 0xdaff3cfdda38610c, + 0xe4e797272c71c34f, + 0x39c07b1934bdede9, + 0x8894186efb963f38, + 0x48a00e80dc639bd5, + 0xa4e8092be96c1c99, + 0x5a097d54ca573661, + 0x2d45892b17c9e755, + 0xd033fd7289308df8, + 0x6c2fe9d9525b8bd9, + 0x2edbecf1c11cc079, + // 2^84 * 4 * G + 0x1616a4e3c715a0d2, + 0x53623cb0f8341d4d, + 0x96ef5329c7e899cb, + 0x3d4e8dbba668baa6, + 0xee0f0fddd087a25f, + 0x9c7531555c3e34ee, + 0x660c572e8fab3ab5, + 0x0854fc44544cd3b2, + 0x61eba0c555edad19, + 0x24b533fef0a83de6, + 0x3b77042883baa5f8, + 0x678f82b898a47e8d, + // 2^84 * 5 * G + 0xb1491d0bd6900c54, + 0x3539722c9d132636, + 0x4db928920b362bc9, + 0x4d7cd1fea68b69df, + 0x1e09d94057775696, + 0xeed1265c3cd951db, + 0xfa9dac2b20bce16f, + 0x0f7f76e0e8d089f4, + 0x36d9ebc5d485b00c, + 0xa2596492e4adb365, + 0xc1659480c2119ccd, + 0x45306349186e0d5f, + // 2^84 * 6 * G + 0x94ddd0c1a6cdff1d, + 0x55f6f115e84213ae, + 0x6c935f85992fcf6a, + 0x067ee0f54a37f16f, + 0x96a414ec2b072491, + 0x1bb2218127a7b65b, + 0x6d2849596e8a4af0, + 0x65f3b08ccd27765f, + 0xecb29fff199801f7, + 0x9d361d1fa2a0f72f, + 0x25f11d2375fd2f49, + 0x124cefe80fe10fe2, + // 2^84 * 7 * G + 0x4c126cf9d18df255, + 0xc1d471e9147a63b6, + 0x2c6d3c73f3c93b5f, + 0x6be3a6a2e3ff86a2, + 0x1518e85b31b16489, + 0x8faadcb7db710bfb, + 0x39b0bdf4a14ae239, + 0x05f4cbea503d20c1, + 0xce040e9ec04145bc, + 0xc71ff4e208f6834c, + 0xbd546e8dab8847a3, + 0x64666aa0a4d2aba5, + // 2^84 * 8 * G + 0x6841435a7c06d912, + 0xca123c21bb3f830b, + 0xd4b37b27b1cbe278, + 0x1d753b84c76f5046, + 0xb0c53bf73337e94c, + 0x7cb5697e11e14f15, + 0x4b84abac1930c750, + 0x28dd4abfe0640468, + 0x7dc0b64c44cb9f44, + 0x18a3e1ace3925dbf, + 0x7a3034862d0457c4, + 0x4c498bf78a0c892e, + // 2^88 * 1 * G + 0x37d653fb1aa73196, + 0x0f9495303fd76418, + 0xad200b09fb3a17b2, + 0x544d49292fc8613e, + 0x22d2aff530976b86, + 0x8d90b806c2d24604, + 0xdca1896c4de5bae5, + 0x28005fe6c8340c17, + 0x6aefba9f34528688, + 0x5c1bff9425107da1, + 0xf75bbbcd66d94b36, + 0x72e472930f316dfa, + // 2^88 * 2 * G + 0x2695208c9781084f, + 0xb1502a0b23450ee1, + 0xfd9daea603efde02, + 0x5a9d2e8c2733a34c, + 0x07f3f635d32a7627, + 0x7aaa4d865f6566f0, + 0x3c85e79728d04450, + 0x1fee7f000fe06438, + 0x765305da03dbf7e5, + 0xa4daf2491434cdbd, + 0x7b4ad5cdd24a88ec, + 0x00f94051ee040543, + // 2^88 * 3 * G + 0x8d356b23c3d330b2, + 0xf21c8b9bb0471b06, + 0xb36c316c6e42b83c, + 0x07d79c7e8beab10d, + 0xd7ef93bb07af9753, + 0x583ed0cf3db766a7, + 0xce6998bf6e0b1ec5, + 0x47b7ffd25dd40452, + 0x87fbfb9cbc08dd12, + 0x8a066b3ae1eec29b, + 0x0d57242bdb1fc1bf, + 0x1c3520a35ea64bb6, + // 2^88 * 4 * G + 0x80d253a6bccba34a, + 0x3e61c3a13838219b, + 0x90c3b6019882e396, + 0x1c3d05775d0ee66f, + 0xcda86f40216bc059, + 0x1fbb231d12bcd87e, + 0xb4956a9e17c70990, + 0x38750c3b66d12e55, + 0x692ef1409422e51a, + 0xcbc0c73c2b5df671, + 0x21014fe7744ce029, + 0x0621e2c7d330487c, + // 2^88 * 5 * G + 0xaf9860cc8259838d, + 0x90ea48c1c69f9adc, + 0x6526483765581e30, + 0x0007d6097bd3a5bc, + 0xb7ae1796b0dbf0f3, + 0x54dfafb9e17ce196, + 0x25923071e9aaa3b4, + 0x5d8e589ca1002e9d, + 0xc0bf1d950842a94b, + 0xb2d3c363588f2e3e, + 0x0a961438bb51e2ef, + 0x1583d7783c1cbf86, + // 2^88 * 6 * G + 0xeceea2ef5da27ae1, + 0x597c3a1455670174, + 0xc9a62a126609167a, + 0x252a5f2e81ed8f70, + 0x90034704cc9d28c7, + 0x1d1b679ef72cc58f, + 0x16e12b5fbe5b8726, + 0x4958064e83c5580a, + 0x0d2894265066e80d, + 0xfcc3f785307c8c6b, + 0x1b53da780c1112fd, + 0x079c170bd843b388, + // 2^88 * 7 * G + 0x0506ece464fa6fff, + 0xbee3431e6205e523, + 0x3579422451b8ea42, + 0x6dec05e34ac9fb00, + 0xcdd6cd50c0d5d056, + 0x9af7686dbb03573b, + 0x3ca6723ff3c3ef48, + 0x6768c0d7317b8acc, + 0x94b625e5f155c1b3, + 0x417bf3a7997b7b91, + 0xc22cbddc6d6b2600, + 0x51445e14ddcd52f4, + // 2^88 * 8 * G + 0x57502b4b3b144951, + 0x8e67ff6b444bbcb3, + 0xb8bd6927166385db, + 0x13186f31e39295c8, + 0x893147ab2bbea455, + 0x8c53a24f92079129, + 0x4b49f948be30f7a7, + 0x12e990086e4fd43d, + 0xf10c96b37fdfbb2e, + 0x9f9a935e121ceaf9, + 0xdf1136c43a5b983f, + 0x77b2e3f05d3e99af, + // 2^92 * 1 * G + 0xfd0d75879cf12657, + 0xe82fef94e53a0e29, + 0xcc34a7f05bbb4be7, + 0x0b251172a50c38a2, + 0x9532f48fcc5cd29b, + 0x2ba851bea3ce3671, + 0x32dacaa051122941, + 0x478d99d9350004f2, + 0x1d5ad94890bb02c0, + 0x50e208b10ec25115, + 0xa26a22894ef21702, + 0x4dc923343b524805, + // 2^92 * 2 * G + 0xe3828c400f8086b6, + 0x3f77e6f7979f0dc8, + 0x7ef6de304df42cb4, + 0x5265797cb6abd784, + 0x3ad3e3ebf36c4975, + 0xd75d25a537862125, + 0xe873943da025a516, + 0x6bbc7cb4c411c847, + 0x3c6f9cd1d4a50d56, + 0xb6244077c6feab7e, + 0x6ff9bf483580972e, + 0x00375883b332acfb, + // 2^92 * 3 * G + 0x0001b2cd28cb0940, + 0x63fb51a06f1c24c9, + 0xb5ad8691dcd5ca31, + 0x67238dbd8c450660, + 0xc98bec856c75c99c, + 0xe44184c000e33cf4, + 0x0a676b9bba907634, + 0x669e2cb571f379d7, + 0xcb116b73a49bd308, + 0x025aad6b2392729e, + 0xb4793efa3f55d9b1, + 0x72a1056140678bb9, + // 2^92 * 4 * G + 0xa2b6812b1cc9249d, + 0x62866eee21211f58, + 0x2cb5c5b85df10ece, + 0x03a6b259e263ae00, + 0x0d8d2909e2e505b6, + 0x98ca78abc0291230, + 0x77ef5569a9b12327, + 0x7c77897b81439b47, + 0xf1c1b5e2de331cb5, + 0x5a9f5d8e15fca420, + 0x9fa438f17bd932b1, + 0x2a381bf01c6146e7, + // 2^92 * 5 * G + 0xac9b9879cfc811c1, + 0x8b7d29813756e567, + 0x50da4e607c70edfc, + 0x5dbca62f884400b6, + 0xf7c0be32b534166f, + 0x27e6ca6419cf70d4, + 0x934df7d7a957a759, + 0x5701461dabdec2aa, + 0x2c6747402c915c25, + 0x1bdcd1a80b0d340a, + 0x5e5601bd07b43f5f, + 0x2555b4e05539a242, + // 2^92 * 6 * G + 0x6fc09f5266ddd216, + 0xdce560a7c8e37048, + 0xec65939da2df62fd, + 0x7a869ae7e52ed192, + 0x78409b1d87e463d4, + 0xad4da95acdfb639d, + 0xec28773755259b9c, + 0x69c806e9c31230ab, + 0x7b48f57414bb3f22, + 0x68c7cee4aedccc88, + 0xed2f936179ed80be, + 0x25d70b885f77bc4b, + // 2^92 * 7 * G + 0x4151c3d9762bf4de, + 0x083f435f2745d82b, + 0x29775a2e0d23ddd5, + 0x138e3a6269a5db24, + 0x98459d29bb1ae4d4, + 0x56b9c4c739f954ec, + 0x832743f6c29b4b3e, + 0x21ea8e2798b6878a, + 0x87bef4b46a5a7b9c, + 0xd2299d1b5fc1d062, + 0x82409818dd321648, + 0x5c5abeb1e5a2e03d, + // 2^92 * 8 * G + 0x14722af4b73c2ddb, + 0xbc470c5f5a05060d, + 0x00943eac2581b02e, + 0x0e434b3b1f499c8f, + 0x02cde6de1306a233, + 0x7b5a52a2116f8ec7, + 0xe1c681f4c1163b5b, + 0x241d350660d32643, + 0x6be4404d0ebc52c7, + 0xae46233bb1a791f5, + 0x2aec170ed25db42b, + 0x1d8dfd966645d694, + // 2^96 * 1 * G + 0x296fa9c59c2ec4de, + 0xbc8b61bf4f84f3cb, + 0x1c7706d917a8f908, + 0x63b795fc7ad3255d, + 0xd598639c12ddb0a4, + 0xa5d19f30c024866b, + 0xd17c2f0358fce460, + 0x07a195152e095e8a, + 0xa8368f02389e5fc8, + 0x90433b02cf8de43b, + 0xafa1fd5dc5412643, + 0x3e8fe83d032f0137, + // 2^96 * 2 * G + 0x2f8b15b90570a294, + 0x94f2427067084549, + 0xde1c5ae161bbfd84, + 0x75ba3b797fac4007, + 0x08704c8de8efd13c, + 0xdfc51a8e33e03731, + 0xa59d5da51260cde3, + 0x22d60899a6258c86, + 0x6239dbc070cdd196, + 0x60fe8a8b6c7d8a9a, + 0xb38847bceb401260, + 0x0904d07b87779e5e, + // 2^96 * 3 * G + 0xb4ce1fd4ddba919c, + 0xcf31db3ec74c8daa, + 0x2c63cc63ad86cc51, + 0x43e2143fbc1dde07, + 0xf4322d6648f940b9, + 0x06952f0cbd2d0c39, + 0x167697ada081f931, + 0x6240aacebaf72a6c, + 0xf834749c5ba295a0, + 0xd6947c5bca37d25a, + 0x66f13ba7e7c9316a, + 0x56bdaf238db40cac, + // 2^96 * 4 * G + 0x362ab9e3f53533eb, + 0x338568d56eb93d40, + 0x9e0e14521d5a5572, + 0x1d24a86d83741318, + 0x1310d36cc19d3bb2, + 0x062a6bb7622386b9, + 0x7c9b8591d7a14f5c, + 0x03aa31507e1e5754, + 0xf4ec7648ffd4ce1f, + 0xe045eaf054ac8c1c, + 0x88d225821d09357c, + 0x43b261dc9aeb4859, + // 2^96 * 5 * G + 0xe55b1e1988bb79bb, + 0xa09ed07dc17a359d, + 0xb02c2ee2603dea33, + 0x326055cf5b276bc2, + 0x19513d8b6c951364, + 0x94fe7126000bf47b, + 0x028d10ddd54f9567, + 0x02b4d5e242940964, + 0xb4a155cb28d18df2, + 0xeacc4646186ce508, + 0xc49cf4936c824389, + 0x27a6c809ae5d3410, + // 2^96 * 6 * G + 0x8ba6ebcd1f0db188, + 0x37d3d73a675a5be8, + 0xf22edfa315f5585a, + 0x2cb67174ff60a17e, + 0xcd2c270ac43d6954, + 0xdd4a3e576a66cab2, + 0x79fa592469d7036c, + 0x221503603d8c2599, + 0x59eecdf9390be1d0, + 0xa9422044728ce3f1, + 0x82891c667a94f0f4, + 0x7b1df4b73890f436, + // 2^96 * 7 * G + 0xe492f2e0b3b2a224, + 0x7c6c9e062b551160, + 0x15eb8fe20d7f7b0e, + 0x61fcef2658fc5992, + 0x5f2e221807f8f58c, + 0xe3555c9fd49409d4, + 0xb2aaa88d1fb6a630, + 0x68698245d352e03d, + 0xdbb15d852a18187a, + 0xf3e4aad386ddacd7, + 0x44bae2810ff6c482, + 0x46cf4c473daf01cf, + // 2^96 * 8 * G + 0x426525ed9ec4e5f9, + 0x0e5eda0116903303, + 0x72b1a7f2cbe5cadc, + 0x29387bcd14eb5f40, + 0x213c6ea7f1498140, + 0x7c1e7ef8392b4854, + 0x2488c38c5629ceba, + 0x1065aae50d8cc5bb, + 0x1c2c4525df200d57, + 0x5c3b2dd6bfca674a, + 0x0a07e7b1e1834030, + 0x69a198e64f1ce716, + // 2^100 * 1 * G + 0x7afcd613efa9d697, + 0x0cc45aa41c067959, + 0xa56fe104c1fada96, + 0x3a73b70472e40365, + 0x7b26e56b9e2d4734, + 0xc4c7132b81c61675, + 0xef5c9525ec9cde7f, + 0x39c80b16e71743ad, + 0x0f196e0d1b826c68, + 0xf71ff0e24960e3db, + 0x6113167023b7436c, + 0x0cf0ea5877da7282, + // 2^100 * 2 * G + 0x196c80a4ddd4ccbd, + 0x22e6f55d95f2dd9d, + 0xc75e33c740d6c71b, + 0x7bb51279cb3c042f, + 0xe332ced43ba6945a, + 0xde0b1361e881c05d, + 0x1ad40f095e67ed3b, + 0x5da8acdab8c63d5d, + 0xc4b6664a3a70159f, + 0x76194f0f0a904e14, + 0xa5614c39a4096c13, + 0x6cd0ff50979feced, + // 2^100 * 3 * G + 0xc0e067e78f4428ac, + 0x14835ab0a61135e3, + 0xf21d14f338062935, + 0x6390a4c8df04849c, + 0x7fecfabdb04ba18e, + 0xd0fc7bfc3bddbcf7, + 0xa41d486e057a131c, + 0x641a4391f2223a61, + 0xc5c6b95aa606a8db, + 0x914b7f9eb06825f1, + 0x2a731f6b44fc9eff, + 0x30ddf38562705cfc, + // 2^100 * 4 * G + 0x4e3dcbdad1bff7f9, + 0xc9118e8220645717, + 0xbacccebc0f189d56, + 0x1b4822e9d4467668, + 0x33bef2bd68bcd52c, + 0xc649dbb069482ef2, + 0xb5b6ee0c41cb1aee, + 0x5c294d270212a7e5, + 0xab360a7f25563781, + 0x2512228a480f7958, + 0xc75d05276114b4e3, + 0x222d9625d976fe2a, + // 2^100 * 5 * G + 0x1c717f85b372ace1, + 0x81930e694638bf18, + 0x239cad056bc08b58, + 0x0b34271c87f8fff4, + 0x0f94be7e0a344f85, + 0xeb2faa8c87f22c38, + 0x9ce1e75e4ee16f0f, + 0x43e64e5418a08dea, + 0x8155e2521a35ce63, + 0xbe100d4df912028e, + 0xbff80bf8a57ddcec, + 0x57342dc96d6bc6e4, + // 2^100 * 6 * G + 0xefeef065c8ce5998, + 0xbf029510b5cbeaa2, + 0x8c64a10620b7c458, + 0x35134fb231c24855, + 0xf3c3bcb71e707bf6, + 0x351d9b8c7291a762, + 0x00502e6edad69a33, + 0x522f521f1ec8807f, + 0x272c1f46f9a3902b, + 0xc91ba3b799657bcc, + 0xae614b304f8a1c0e, + 0x7afcaad70b99017b, + // 2^100 * 7 * G + 0xc25ded54a4b8be41, + 0x902d13e11bb0e2dd, + 0x41f43233cde82ab2, + 0x1085faa5c3aae7cb, + 0xa88141ecef842b6b, + 0x55e7b14797abe6c5, + 0x8c748f9703784ffe, + 0x5b50a1f7afcd00b7, + 0x9b840f66f1361315, + 0x18462242701003e9, + 0x65ed45fae4a25080, + 0x0a2862393fda7320, + // 2^100 * 8 * G + 0x46ab13c8347cbc9d, + 0x3849e8d499c12383, + 0x4cea314087d64ac9, + 0x1f354134b1a29ee7, + 0x960e737b6ecb9d17, + 0xfaf24948d67ceae1, + 0x37e7a9b4d55e1b89, + 0x5cb7173cb46c59eb, + 0x4a89e68b82b7abf0, + 0xf41cd9279ba6b7b9, + 0x16e6c210e18d876f, + 0x7cacdb0f7f1b09c6, + // 2^104 * 1 * G + 0x9062b2e0d91a78bc, + 0x47c9889cc8509667, + 0x9df54a66405070b8, + 0x7369e6a92493a1bf, + 0xe1014434dcc5caed, + 0x47ed5d963c84fb33, + 0x70019576ed86a0e7, + 0x25b2697bd267f9e4, + 0x9d673ffb13986864, + 0x3ca5fbd9415dc7b8, + 0xe04ecc3bdf273b5e, + 0x1420683db54e4cd2, + // 2^104 * 2 * G + 0xb478bd1e249dd197, + 0x620c35005e58c102, + 0xfb02d32fccbaac5c, + 0x60b63bebf508a72d, + 0x34eebb6fc1cc5ad0, + 0x6a1b0ce99646ac8b, + 0xd3b0da49a66bde53, + 0x31e83b4161d081c1, + 0x97e8c7129e062b4f, + 0x49e48f4f29320ad8, + 0x5bece14b6f18683f, + 0x55cf1eb62d550317, + // 2^104 * 3 * G + 0x5879101065c23d58, + 0x8b9d086d5094819c, + 0xe2402fa912c55fa7, + 0x669a6564570891d4, + 0x3076b5e37df58c52, + 0xd73ab9dde799cc36, + 0xbd831ce34913ee20, + 0x1a56fbaa62ba0133, + 0x943e6b505c9dc9ec, + 0x302557bba77c371a, + 0x9873ae5641347651, + 0x13c4836799c58a5c, + // 2^104 * 4 * G + 0x423a5d465ab3e1b9, + 0xfc13c187c7f13f61, + 0x19f83664ecb5b9b6, + 0x66f80c93a637b607, + 0xc4dcfb6a5d8bd080, + 0xdeebc4ec571a4842, + 0xd4b2e883b8e55365, + 0x50bdc87dc8e5b827, + 0x606d37836edfe111, + 0x32353e15f011abd9, + 0x64b03ac325b73b96, + 0x1dd56444725fd5ae, + // 2^104 * 5 * G + 0x8fa47ff83362127d, + 0xbc9f6ac471cd7c15, + 0x6e71454349220c8b, + 0x0e645912219f732e, + 0xc297e60008bac89a, + 0x7d4cea11eae1c3e0, + 0xf3e38be19fe7977c, + 0x3a3a450f63a305cd, + 0x078f2f31d8394627, + 0x389d3183de94a510, + 0xd1e36c6d17996f80, + 0x318c8d9393a9a87b, + // 2^104 * 6 * G + 0xf2745d032afffe19, + 0x0c9f3c497f24db66, + 0xbc98d3e3ba8598ef, + 0x224c7c679a1d5314, + 0x5d669e29ab1dd398, + 0xfc921658342d9e3b, + 0x55851dfdf35973cd, + 0x509a41c325950af6, + 0xbdc06edca6f925e9, + 0x793ef3f4641b1f33, + 0x82ec12809d833e89, + 0x05bff02328a11389, + // 2^104 * 7 * G + 0x3632137023cae00b, + 0x544acf0ad1accf59, + 0x96741049d21a1c88, + 0x780b8cc3fa2a44a7, + 0x6881a0dd0dc512e4, + 0x4fe70dc844a5fafe, + 0x1f748e6b8f4a5240, + 0x576277cdee01a3ea, + 0x1ef38abc234f305f, + 0x9a577fbd1405de08, + 0x5e82a51434e62a0d, + 0x5ff418726271b7a1, + // 2^104 * 8 * G + 0x398e080c1789db9d, + 0xa7602025f3e778f5, + 0xfa98894c06bd035d, + 0x106a03dc25a966be, + 0xe5db47e813b69540, + 0xf35d2a3b432610e1, + 0xac1f26e938781276, + 0x29d4db8ca0a0cb69, + 0xd9ad0aaf333353d0, + 0x38669da5acd309e5, + 0x3c57658ac888f7f0, + 0x4ab38a51052cbefa, + // 2^108 * 1 * G + 0xdfdacbee4324c0e9, + 0x054442883f955bb7, + 0xdef7aaa8ea31609f, + 0x68aee70642287cff, + 0xf68fe2e8809de054, + 0xe3bc096a9c82bad1, + 0x076353d40aadbf45, + 0x7b9b1fb5dea1959e, + 0xf01cc8f17471cc0c, + 0x95242e37579082bb, + 0x27776093d3e46b5f, + 0x2d13d55a28bd85fb, + // 2^108 * 2 * G + 0xfac5d2065b35b8da, + 0xa8da8a9a85624bb7, + 0xccd2ca913d21cd0f, + 0x6b8341ee8bf90d58, + 0xbf019cce7aee7a52, + 0xa8ded2b6e454ead3, + 0x3c619f0b87a8bb19, + 0x3619b5d7560916d8, + 0x3579f26b0282c4b2, + 0x64d592f24fafefae, + 0xb7cded7b28c8c7c0, + 0x6a927b6b7173a8d7, + // 2^108 * 3 * G + 0x1f6db24f986e4656, + 0x1021c02ed1e9105b, + 0xf8ff3fff2cc0a375, + 0x1d2a6bf8c6c82592, + 0x8d7040863ece88eb, + 0xf0e307a980eec08c, + 0xac2250610d788fda, + 0x056d92a43a0d478d, + 0x1b05a196fc3da5a1, + 0x77d7a8c243b59ed0, + 0x06da3d6297d17918, + 0x66fbb494f12353f7, + // 2^108 * 4 * G + 0x751a50b9d85c0fb8, + 0xd1afdc258bcf097b, + 0x2f16a6a38309a969, + 0x14ddff9ee5b00659, + 0xd6d70996f12309d6, + 0xdbfb2385e9c3d539, + 0x46d602b0f7552411, + 0x270a0b0557843e0c, + 0x61ff0640a7862bcc, + 0x81cac09a5f11abfe, + 0x9047830455d12abb, + 0x19a4bde1945ae873, + // 2^108 * 5 * G + 0x9b9f26f520a6200a, + 0x64804443cf13eaf8, + 0x8a63673f8631edd3, + 0x72bbbce11ed39dc1, + 0x40c709dec076c49f, + 0x657bfaf27f3e53f6, + 0x40662331eca042c4, + 0x14b375487eb4df04, + 0xae853c94ab66dc47, + 0xeb62343edf762d6e, + 0xf08e0e186fb2f7d1, + 0x4f0b1c02700ab37a, + // 2^108 * 6 * G + 0xe1706787d81951fa, + 0xa10a2c8eb290c77b, + 0xe7382fa03ed66773, + 0x0a4d84710bcc4b54, + 0x79fd21ccc1b2e23f, + 0x4ae7c281453df52a, + 0xc8172ec9d151486b, + 0x68abe9443e0a7534, + 0xda12c6c407831dcb, + 0x0da230d74d5c510d, + 0x4ab1531e6bd404e1, + 0x4106b166bcf440ef, + // 2^108 * 7 * G + 0x02e57a421cd23668, + 0x4ad9fb5d0eaef6fd, + 0x954e6727b1244480, + 0x7f792f9d2699f331, + 0xa485ccd539e4ecf2, + 0x5aa3f3ad0555bab5, + 0x145e3439937df82d, + 0x1238b51e1214283f, + 0x0b886b925fd4d924, + 0x60906f7a3626a80d, + 0xecd367b4b98abd12, + 0x2876beb1def344cf, + // 2^108 * 8 * G + 0xdc84e93563144691, + 0x632fe8a0d61f23f4, + 0x4caa800612a9a8d5, + 0x48f9dbfa0e9918d3, + 0xd594b3333a8a85f8, + 0x4ea37689e78d7d58, + 0x73bf9f455e8e351f, + 0x5507d7d2bc41ebb4, + 0x1ceb2903299572fc, + 0x7c8ccaa29502d0ee, + 0x91bfa43411cce67b, + 0x5784481964a831e7, + // 2^112 * 1 * G + 0xda7c2b256768d593, + 0x98c1c0574422ca13, + 0xf1a80bd5ca0ace1d, + 0x29cdd1adc088a690, + 0xd6cfd1ef5fddc09c, + 0xe82b3efdf7575dce, + 0x25d56b5d201634c2, + 0x3041c6bb04ed2b9b, + 0x0ff2f2f9d956e148, + 0xade797759f356b2e, + 0x1a4698bb5f6c025c, + 0x104bbd6814049a7b, + // 2^112 * 2 * G + 0x51f0fd3168f1ed67, + 0x2c811dcdd86f3bc2, + 0x44dc5c4304d2f2de, + 0x5be8cc57092a7149, + 0xa95d9a5fd67ff163, + 0xe92be69d4cc75681, + 0xb7f8024cde20f257, + 0x204f2a20fb072df5, + 0xc8143b3d30ebb079, + 0x7589155abd652e30, + 0x653c3c318f6d5c31, + 0x2570fb17c279161f, + // 2^112 * 3 * G + 0x3efa367f2cb61575, + 0xf5f96f761cd6026c, + 0xe8c7142a65b52562, + 0x3dcb65ea53030acd, + 0x192ea9550bb8245a, + 0xc8e6fba88f9050d1, + 0x7986ea2d88a4c935, + 0x241c5f91de018668, + 0x28d8172940de6caa, + 0x8fbf2cf022d9733a, + 0x16d7fcdd235b01d1, + 0x08420edd5fcdf0e5, + // 2^112 * 4 * G + 0xcdff20ab8362fa4a, + 0x57e118d4e21a3e6e, + 0xe3179617fc39e62b, + 0x0d9a53efbc1769fd, + 0x0358c34e04f410ce, + 0xb6135b5a276e0685, + 0x5d9670c7ebb91521, + 0x04d654f321db889c, + 0x5e7dc116ddbdb5d5, + 0x2954deb68da5dd2d, + 0x1cb608173334a292, + 0x4a7a4f2618991ad7, + // 2^112 * 5 * G + 0xf4a718025fb15f95, + 0x3df65f346b5c1b8f, + 0xcdfcf08500e01112, + 0x11b50c4cddd31848, + 0x24c3b291af372a4b, + 0x93da8270718147f2, + 0xdd84856486899ef2, + 0x4a96314223e0ee33, + 0xa6e8274408a4ffd6, + 0x738e177e9c1576d9, + 0x773348b63d02b3f2, + 0x4f4bce4dce6bcc51, + // 2^112 * 6 * G + 0xa71fce5ae2242584, + 0x26ea725692f58a9e, + 0xd21a09d71cea3cf4, + 0x73fcdd14b71c01e6, + 0x30e2616ec49d0b6f, + 0xe456718fcaec2317, + 0x48eb409bf26b4fa6, + 0x3042cee561595f37, + 0x427e7079449bac41, + 0x855ae36dbce2310a, + 0x4cae76215f841a7c, + 0x389e740c9a9ce1d6, + // 2^112 * 7 * G + 0x64fcb3ae34dcb9ce, + 0x97500323e348d0ad, + 0x45b3f07d62c6381b, + 0x61545379465a6788, + 0xc9bd78f6570eac28, + 0xe55b0b3227919ce1, + 0x65fc3eaba19b91ed, + 0x25c425e5d6263690, + 0x3f3e06a6f1d7de6e, + 0x3ef976278e062308, + 0x8c14f6264e8a6c77, + 0x6539a08915484759, + // 2^112 * 8 * G + 0xe9d21f74c3d2f773, + 0xc150544125c46845, + 0x624e5ce8f9b99e33, + 0x11c5e4aac5cd186c, + 0xddc4dbd414bb4a19, + 0x19b2bc3c98424f8e, + 0x48a89fd736ca7169, + 0x0f65320ef019bd90, + 0xd486d1b1cafde0c6, + 0x4f3fe6e3163b5181, + 0x59a8af0dfaf2939a, + 0x4cabc7bdec33072a, + // 2^116 * 1 * G + 0x16faa8fb532f7428, + 0xdbd42ea046a4e272, + 0x5337653b8b9ea480, + 0x4065947223973f03, + 0xf7c0a19c1a54a044, + 0x4a1c5e2477bd9fbb, + 0xa6e3ca115af22972, + 0x1819bb953f2e9e0d, + 0x498fbb795e042e84, + 0x7d0dd89a7698b714, + 0x8bfb0ba427fe6295, + 0x36ba82e721200524, + // 2^116 * 2 * G + 0xd60ecbb74245ec41, + 0xfd9be89e34348716, + 0xc9240afee42284de, + 0x4472f648d0531db4, + 0xc8d69d0a57274ed5, + 0x45ba803260804b17, + 0xdf3cda102255dfac, + 0x77d221232709b339, + 0x498a6d7064ad94d8, + 0xa5b5c8fd9af62263, + 0x8ca8ed0545c141f4, + 0x2c63bec3662d358c, + // 2^116 * 3 * G + 0x7fe60d8bea787955, + 0xb9dc117eb5f401b7, + 0x91c7c09a19355cce, + 0x22692ef59442bedf, + 0x9a518b3a8586f8bf, + 0x9ee71af6cbb196f0, + 0xaa0625e6a2385cf2, + 0x1deb2176ddd7c8d1, + 0x8563d19a2066cf6c, + 0x401bfd8c4dcc7cd7, + 0xd976a6becd0d8f62, + 0x67cfd773a278b05e, + // 2^116 * 4 * G + 0x8dec31faef3ee475, + 0x99dbff8a9e22fd92, + 0x512d11594e26cab1, + 0x0cde561eec4310b9, + 0x2d5fa9855a4e586a, + 0x65f8f7a449beab7e, + 0xaa074dddf21d33d3, + 0x185cba721bcb9dee, + 0x93869da3f4e3cb41, + 0xbf0392f540f7977e, + 0x026204fcd0463b83, + 0x3ec91a769eec6eed, + // 2^116 * 5 * G + 0x1e9df75bf78166ad, + 0x4dfda838eb0cd7af, + 0xba002ed8c1eaf988, + 0x13fedb3e11f33cfc, + 0x0fad2fb7b0a3402f, + 0x46615ecbfb69f4a8, + 0xf745bcc8c5f8eaa6, + 0x7a5fa8794a94e896, + 0x52958faa13cd67a1, + 0x965ee0818bdbb517, + 0x16e58daa2e8845b3, + 0x357d397d5499da8f, + // 2^116 * 6 * G + 0x1ebfa05fb0bace6c, + 0xc934620c1caf9a1e, + 0xcc771cc41d82b61a, + 0x2d94a16aa5f74fec, + 0x481dacb4194bfbf8, + 0x4d77e3f1bae58299, + 0x1ef4612e7d1372a0, + 0x3a8d867e70ff69e1, + 0x6f58cd5d55aff958, + 0xba3eaa5c75567721, + 0x75c123999165227d, + 0x69be1343c2f2b35e, + // 2^116 * 7 * G + 0x0e091d5ee197c92a, + 0x4f51019f2945119f, + 0x143679b9f034e99c, + 0x7d88112e4d24c696, + 0x82bbbdac684b8de3, + 0xa2f4c7d03fca0718, + 0x337f92fbe096aaa8, + 0x200d4d8c63587376, + 0x208aed4b4893b32b, + 0x3efbf23ebe59b964, + 0xd762deb0dba5e507, + 0x69607bd681bd9d94, + // 2^116 * 8 * G + 0xf6be021068de1ce1, + 0xe8d518e70edcbc1f, + 0xe3effdd01b5505a5, + 0x35f63353d3ec3fd0, + 0x3b7f3bd49323a902, + 0x7c21b5566b2c6e53, + 0xe5ba8ff53a7852a7, + 0x28bc77a5838ece00, + 0x63ba78a8e25d8036, + 0x63651e0094333490, + 0x48d82f20288ce532, + 0x3a31abfa36b57524, + // 2^120 * 1 * G + 0x239e9624089c0a2e, + 0xc748c4c03afe4738, + 0x17dbed2a764fa12a, + 0x639b93f0321c8582, + 0xc08f788f3f78d289, + 0xfe30a72ca1404d9f, + 0xf2778bfccf65cc9d, + 0x7ee498165acb2021, + 0x7bd508e39111a1c3, + 0x2b2b90d480907489, + 0xe7d2aec2ae72fd19, + 0x0edf493c85b602a6, + // 2^120 * 2 * G + 0xaecc8158599b5a68, + 0xea574f0febade20e, + 0x4fe41d7422b67f07, + 0x403b92e3019d4fb4, + 0x6767c4d284764113, + 0xa090403ff7f5f835, + 0x1c8fcffacae6bede, + 0x04c00c54d1dfa369, + 0x4dc22f818b465cf8, + 0x71a0f35a1480eff8, + 0xaee8bfad04c7d657, + 0x355bb12ab26176f4, + // 2^120 * 3 * G + 0xa71e64cc7493bbf4, + 0xe5bd84d9eca3b0c3, + 0x0a6bc50cfa05e785, + 0x0f9b8132182ec312, + 0xa301dac75a8c7318, + 0xed90039db3ceaa11, + 0x6f077cbf3bae3f2d, + 0x7518eaf8e052ad8e, + 0xa48859c41b7f6c32, + 0x0f2d60bcf4383298, + 0x1815a929c9b1d1d9, + 0x47c3871bbb1755c4, + // 2^120 * 4 * G + 0x5144539771ec4f48, + 0xf805b17dc98c5d6e, + 0xf762c11a47c3c66b, + 0x00b89b85764699dc, + 0xfbe65d50c85066b0, + 0x62ecc4b0b3a299b0, + 0xe53754ea441ae8e0, + 0x08fea02ce8d48d5f, + 0x824ddd7668deead0, + 0xc86445204b685d23, + 0xb514cfcd5d89d665, + 0x473829a74f75d537, + // 2^120 * 5 * G + 0x82d2da754679c418, + 0xe63bd7d8b2618df0, + 0x355eef24ac47eb0a, + 0x2078684c4833c6b4, + 0x23d9533aad3902c9, + 0x64c2ddceef03588f, + 0x15257390cfe12fb4, + 0x6c668b4d44e4d390, + 0x3b48cf217a78820c, + 0xf76a0ab281273e97, + 0xa96c65a78c8eed7b, + 0x7411a6054f8a433f, + // 2^120 * 6 * G + 0x4d659d32b99dc86d, + 0x044cdc75603af115, + 0xb34c712cdcc2e488, + 0x7c136574fb8134ff, + 0x579ae53d18b175b4, + 0x68713159f392a102, + 0x8455ecba1eef35f5, + 0x1ec9a872458c398f, + 0xb8e6a4d400a2509b, + 0x9b81d7020bc882b4, + 0x57e7cc9bf1957561, + 0x3add88a5c7cd6460, + // 2^120 * 7 * G + 0xab895770b635dcf2, + 0x02dfef6cf66c1fbc, + 0x85530268beb6d187, + 0x249929fccc879e74, + 0x85c298d459393046, + 0x8f7e35985ff659ec, + 0x1d2ca22af2f66e3a, + 0x61ba1131a406a720, + 0xa3d0a0f116959029, + 0x023b6b6cba7ebd89, + 0x7bf15a3e26783307, + 0x5620310cbbd8ece7, + // 2^120 * 8 * G + 0x528993434934d643, + 0xb9dbf806a51222f5, + 0x8f6d878fc3f41c22, + 0x37676a2a4d9d9730, + 0x6646b5f477e285d6, + 0x40e8ff676c8f6193, + 0xa6ec7311abb594dd, + 0x7ec846f3658cec4d, + 0x9b5e8f3f1da22ec7, + 0x130f1d776c01cd13, + 0x214c8fcfa2989fb8, + 0x6daaf723399b9dd5, + // 2^124 * 1 * G + 0x591e4a5610628564, + 0x2a4bb87ca8b4df34, + 0xde2a2572e7a38e43, + 0x3cbdabd9fee5046e, + 0x81aebbdd2cd13070, + 0x962e4325f85a0e9e, + 0xde9391aacadffecb, + 0x53177fda52c230e6, + 0xa7bc970650b9de79, + 0x3d12a7fbc301b59b, + 0x02652e68d36ae38c, + 0x79d739835a6199dc, + // 2^124 * 2 * G + 0xd9354df64131c1bd, + 0x758094a186ec5822, + 0x4464ee12e459f3c2, + 0x6c11fce4cb133282, + 0x21c9d9920d591737, + 0x9bea41d2e9b46cd6, + 0xe20e84200d89bfca, + 0x79d99f946eae5ff8, + 0xf17b483568673205, + 0x387deae83caad96c, + 0x61b471fd56ffe386, + 0x31741195b745a599, + // 2^124 * 3 * G + 0xe8d10190b77a360b, + 0x99b983209995e702, + 0xbd4fdff8fa0247aa, + 0x2772e344e0d36a87, + 0x17f8ba683b02a047, + 0x50212096feefb6c8, + 0x70139be21556cbe2, + 0x203e44a11d98915b, + 0xd6863eba37b9e39f, + 0x105bc169723b5a23, + 0x104f6459a65c0762, + 0x567951295b4d38d4, + // 2^124 * 4 * G + 0x535fd60613037524, + 0xe210adf6b0fbc26a, + 0xac8d0a9b23e990ae, + 0x47204d08d72fdbf9, + 0x07242eb30d4b497f, + 0x1ef96306b9bccc87, + 0x37950934d8116f45, + 0x05468d6201405b04, + 0x00f565a9f93267de, + 0xcecfd78dc0d58e8a, + 0xa215e2dcf318e28e, + 0x4599ee919b633352, + // 2^124 * 5 * G + 0xd3c220ca70e0e76b, + 0xb12bea58ea9f3094, + 0x294ddec8c3271282, + 0x0c3539e1a1d1d028, + 0xac746d6b861ae579, + 0x31ab0650f6aea9dc, + 0x241d661140256d4c, + 0x2f485e853d21a5de, + 0x329744839c0833f3, + 0x6fe6257fd2abc484, + 0x5327d1814b358817, + 0x65712585893fe9bc, + // 2^124 * 6 * G + 0x9c102fb732a61161, + 0xe48e10dd34d520a8, + 0x365c63546f9a9176, + 0x32f6fe4c046f6006, + 0x81c29f1bd708ee3f, + 0xddcb5a05ae6407d0, + 0x97aec1d7d2a3eba7, + 0x1590521a91d50831, + 0x40a3a11ec7910acc, + 0x9013dff8f16d27ae, + 0x1a9720d8abb195d4, + 0x1bb9fe452ea98463, + // 2^124 * 7 * G + 0xe9d1d950b3d54f9e, + 0x2d5f9cbee00d33c1, + 0x51c2c656a04fc6ac, + 0x65c091ee3c1cbcc9, + 0xcf5e6c95cc36747c, + 0x294201536b0bc30d, + 0x453ac67cee797af0, + 0x5eae6ab32a8bb3c9, + 0x7083661114f118ea, + 0x2b37b87b94349cad, + 0x7273f51cb4e99f40, + 0x78a2a95823d75698, + // 2^124 * 8 * G + 0xa2b072e95c8c2ace, + 0x69cffc96651e9c4b, + 0x44328ef842e7b42b, + 0x5dd996c122aadeb3, + 0xb4f23c425ef83207, + 0xabf894d3c9a934b5, + 0xd0708c1339fd87f7, + 0x1876789117166130, + 0x925b5ef0670c507c, + 0x819bc842b93c33bf, + 0x10792e9a70dd003f, + 0x59ad4b7a6e28dc74, + // 2^128 * 1 * G + 0x5f3a7562eb3dbe47, + 0xf7ea38548ebda0b8, + 0x00c3e53145747299, + 0x1304e9e71627d551, + 0x583b04bfacad8ea2, + 0x29b743e8148be884, + 0x2b1e583b0810c5db, + 0x2b5449e58eb3bbaa, + 0x789814d26adc9cfe, + 0x3c1bab3f8b48dd0b, + 0xda0fe1fff979c60a, + 0x4468de2d7c2dd693, + // 2^128 * 2 * G + 0x51bb355e9419469e, + 0x33e6dc4c23ddc754, + 0x93a5b6d6447f9962, + 0x6cce7c6ffb44bd63, + 0x4b9ad8c6f86307ce, + 0x21113531435d0c28, + 0xd4a866c5657a772c, + 0x5da6427e63247352, + 0x1a94c688deac22ca, + 0xb9066ef7bbae1ff8, + 0x88ad8c388d59580f, + 0x58f29abfe79f2ca8, + // 2^128 * 3 * G + 0xe90ecfab8de73e68, + 0x54036f9f377e76a5, + 0xf0495b0bbe015982, + 0x577629c4a7f41e36, + 0x4b5a64bf710ecdf6, + 0xb14ce538462c293c, + 0x3643d056d50b3ab9, + 0x6af93724185b4870, + 0x3220024509c6a888, + 0xd2e036134b558973, + 0x83e236233c33289f, + 0x701f25bb0caec18f, + // 2^128 * 4 * G + 0xc3a8b0f8e4616ced, + 0xf700660e9e25a87d, + 0x61e3061ff4bca59c, + 0x2e0c92bfbdc40be9, + 0x9d18f6d97cbec113, + 0x844a06e674bfdbe4, + 0x20f5b522ac4e60d6, + 0x720a5bc050955e51, + 0x0c3f09439b805a35, + 0xe84e8b376242abfc, + 0x691417f35c229346, + 0x0e9b9cbb144ef0ec, + // 2^128 * 5 * G + 0xfbbad48ffb5720ad, + 0xee81916bdbf90d0e, + 0xd4813152635543bf, + 0x221104eb3f337bd8, + 0x8dee9bd55db1beee, + 0xc9c3ab370a723fb9, + 0x44a8f1bf1c68d791, + 0x366d44191cfd3cde, + 0x9e3c1743f2bc8c14, + 0x2eda26fcb5856c3b, + 0xccb82f0e68a7fb97, + 0x4167a4e6bc593244, + // 2^128 * 6 * G + 0x643b9d2876f62700, + 0x5d1d9d400e7668eb, + 0x1b4b430321fc0684, + 0x7938bb7e2255246a, + 0xc2be2665f8ce8fee, + 0xe967ff14e880d62c, + 0xf12e6e7e2f364eee, + 0x34b33370cb7ed2f6, + 0xcdc591ee8681d6cc, + 0xce02109ced85a753, + 0xed7485c158808883, + 0x1176fc6e2dfe65e4, + // 2^128 * 7 * G + 0xb4af6cd05b9c619b, + 0x2ddfc9f4b2a58480, + 0x3d4fa502ebe94dc4, + 0x08fc3a4c677d5f34, + 0xdb90e28949770eb8, + 0x98fbcc2aacf440a3, + 0x21354ffeded7879b, + 0x1f6a3e54f26906b6, + 0x60a4c199d30734ea, + 0x40c085b631165cd6, + 0xe2333e23f7598295, + 0x4f2fad0116b900d1, + // 2^128 * 8 * G + 0x44beb24194ae4e54, + 0x5f541c511857ef6c, + 0xa61e6b2d368d0498, + 0x445484a4972ef7ab, + 0x962cd91db73bb638, + 0xe60577aafc129c08, + 0x6f619b39f3b61689, + 0x3451995f2944ee81, + 0x9152fcd09fea7d7c, + 0x4a816c94b0935cf6, + 0x258e9aaa47285c40, + 0x10b89ca6042893b7, + // 2^132 * 1 * G + 0x9b2a426e3b646025, + 0x32127190385ce4cf, + 0xa25cffc2dd6dea45, + 0x06409010bea8de75, + 0xd67cded679d34aa0, + 0xcc0b9ec0cc4db39f, + 0xa535a456e35d190f, + 0x2e05d9eaf61f6fef, + 0xc447901ad61beb59, + 0x661f19bce5dc880a, + 0x24685482b7ca6827, + 0x293c778cefe07f26, + // 2^132 * 2 * G + 0x86809e7007069096, + 0xaad75b15e4e50189, + 0x07f35715a21a0147, + 0x0487f3f112815d5e, + 0x16c795d6a11ff200, + 0xcb70d0e2b15815c9, + 0x89f293209b5395b5, + 0x50b8c2d031e47b4f, + 0x48350c08068a4962, + 0x6ffdd05351092c9a, + 0x17af4f4aaf6fc8dd, + 0x4b0553b53cdba58b, + // 2^132 * 3 * G + 0x9c65fcbe1b32ff79, + 0xeb75ea9f03b50f9b, + 0xfced2a6c6c07e606, + 0x35106cd551717908, + 0xbf05211b27c152d4, + 0x5ec26849bd1af639, + 0x5e0b2caa8e6fab98, + 0x054c8bdd50bd0840, + 0x38a0b12f1dcf073d, + 0x4b60a8a3b7f6a276, + 0xfed5ac25d3404f9a, + 0x72e82d5e5505c229, + // 2^132 * 4 * G + 0x6b0b697ff0d844c8, + 0xbb12f85cd979cb49, + 0xd2a541c6c1da0f1f, + 0x7b7c242958ce7211, + 0x00d9cdfd69771d02, + 0x410276cd6cfbf17e, + 0x4c45306c1cb12ec7, + 0x2857bf1627500861, + 0x9f21903f0101689e, + 0xd779dfd3bf861005, + 0xa122ee5f3deb0f1b, + 0x510df84b485a00d4, + // 2^132 * 5 * G + 0xa54133bb9277a1fa, + 0x74ec3b6263991237, + 0x1a3c54dc35d2f15a, + 0x2d347144e482ba3a, + 0x24b3c887c70ac15e, + 0xb0f3a557fb81b732, + 0x9b2cde2fe578cc1b, + 0x4cf7ed0703b54f8e, + 0x6bd47c6598fbee0f, + 0x9e4733e2ab55be2d, + 0x1093f624127610c5, + 0x4e05e26ad0a1eaa4, + // 2^132 * 6 * G + 0xda9b6b624b531f20, + 0x429a760e77509abb, + 0xdbe9f522e823cb80, + 0x618f1856880c8f82, + 0x1833c773e18fe6c0, + 0xe3c4711ad3c87265, + 0x3bfd3c4f0116b283, + 0x1955875eb4cd4db8, + 0x6da6de8f0e399799, + 0x7ad61aa440fda178, + 0xb32cd8105e3563dd, + 0x15f6beae2ae340ae, + // 2^132 * 7 * G + 0x862bcb0c31ec3a62, + 0x810e2b451138f3c2, + 0x788ec4b839dac2a4, + 0x28f76867ae2a9281, + 0xba9a0f7b9245e215, + 0xf368612dd98c0dbb, + 0x2e84e4cbf220b020, + 0x6ba92fe962d90eda, + 0x3e4df9655884e2aa, + 0xbd62fbdbdbd465a5, + 0xd7596caa0de9e524, + 0x6e8042ccb2b1b3d7, + // 2^132 * 8 * G + 0xf10d3c29ce28ca6e, + 0xbad34540fcb6093d, + 0xe7426ed7a2ea2d3f, + 0x08af9d4e4ff298b9, + 0x1530653616521f7e, + 0x660d06b896203dba, + 0x2d3989bc545f0879, + 0x4b5303af78ebd7b0, + 0x72f8a6c3bebcbde8, + 0x4f0fca4adc3a8e89, + 0x6fa9d4e8c7bfdf7a, + 0x0dcf2d679b624eb7, + // 2^136 * 1 * G + 0x3d5947499718289c, + 0x12ebf8c524533f26, + 0x0262bfcb14c3ef15, + 0x20b878d577b7518e, + 0x753941be5a45f06e, + 0xd07caeed6d9c5f65, + 0x11776b9c72ff51b6, + 0x17d2d1d9ef0d4da9, + 0x27f2af18073f3e6a, + 0xfd3fe519d7521069, + 0x22e3b72c3ca60022, + 0x72214f63cc65c6a7, + // 2^136 * 2 * G + 0xb4e37f405307a693, + 0xaba714d72f336795, + 0xd6fbd0a773761099, + 0x5fdf48c58171cbc9, + 0x1d9db7b9f43b29c9, + 0xd605824a4f518f75, + 0xf2c072bd312f9dc4, + 0x1f24ac855a1545b0, + 0x24d608328e9505aa, + 0x4748c1d10c1420ee, + 0xc7ffe45c06fb25a2, + 0x00ba739e2ae395e6, + // 2^136 * 3 * G + 0x592e98de5c8790d6, + 0xe5bfb7d345c2a2df, + 0x115a3b60f9b49922, + 0x03283a3e67ad78f3, + 0xae4426f5ea88bb26, + 0x360679d984973bfb, + 0x5c9f030c26694e50, + 0x72297de7d518d226, + 0x48241dc7be0cb939, + 0x32f19b4d8b633080, + 0xd3dfc90d02289308, + 0x05e1296846271945, + // 2^136 * 4 * G + 0xba82eeb32d9c495a, + 0xceefc8fcf12bb97c, + 0xb02dabae93b5d1e0, + 0x39c00c9c13698d9b, + 0xadbfbbc8242c4550, + 0xbcc80cecd03081d9, + 0x843566a6f5c8df92, + 0x78cf25d38258ce4c, + 0x15ae6b8e31489d68, + 0xaa851cab9c2bf087, + 0xc9a75a97f04efa05, + 0x006b52076b3ff832, + // 2^136 * 5 * G + 0x29e0cfe19d95781c, + 0xb681df18966310e2, + 0x57df39d370516b39, + 0x4d57e3443bc76122, + 0xf5cb7e16b9ce082d, + 0x3407f14c417abc29, + 0xd4b36bce2bf4a7ab, + 0x7de2e9561a9f75ce, + 0xde70d4f4b6a55ecb, + 0x4801527f5d85db99, + 0xdbc9c440d3ee9a81, + 0x6b2a90af1a6029ed, + // 2^136 * 6 * G + 0x6923f4fc9ae61e97, + 0x5735281de03f5fd1, + 0xa764ae43e6edd12d, + 0x5fd8f4e9d12d3e4a, + 0x77ebf3245bb2d80a, + 0xd8301b472fb9079b, + 0xc647e6f24cee7333, + 0x465812c8276c2109, + 0x4d43beb22a1062d9, + 0x7065fb753831dc16, + 0x180d4a7bde2968d7, + 0x05b32c2b1cb16790, + // 2^136 * 7 * G + 0xc8c05eccd24da8fd, + 0xa1cf1aac05dfef83, + 0xdbbeeff27df9cd61, + 0x3b5556a37b471e99, + 0xf7fca42c7ad58195, + 0x3214286e4333f3cc, + 0xb6c29d0d340b979d, + 0x31771a48567307e1, + 0x32b0c524e14dd482, + 0xedb351541a2ba4b6, + 0xa3d16048282b5af3, + 0x4fc079d27a7336eb, + // 2^136 * 8 * G + 0x51c938b089bf2f7f, + 0x2497bd6502dfe9a7, + 0xffffc09c7880e453, + 0x124567cecaf98e92, + 0xdc348b440c86c50d, + 0x1337cbc9cc94e651, + 0x6422f74d643e3cb9, + 0x241170c2bae3cd08, + 0x3ff9ab860ac473b4, + 0xf0911dee0113e435, + 0x4ae75060ebc6c4af, + 0x3f8612966c87000d, + // 2^140 * 1 * G + 0x0c9c5303f7957be4, + 0xa3c31a20e085c145, + 0xb0721d71d0850050, + 0x0aba390eab0bf2da, + 0x529fdffe638c7bf3, + 0xdf2b9e60388b4995, + 0xe027b34f1bad0249, + 0x7bc92fc9b9fa74ed, + 0x9f97ef2e801ad9f9, + 0x83697d5479afda3a, + 0xe906b3ffbd596b50, + 0x02672b37dd3fb8e0, + // 2^140 * 2 * G + 0x48b2ca8b260885e4, + 0xa4286bec82b34c1c, + 0x937e1a2617f58f74, + 0x741d1fcbab2ca2a5, + 0xee9ba729398ca7f5, + 0xeb9ca6257a4849db, + 0x29eb29ce7ec544e1, + 0x232ca21ef736e2c8, + 0xbf61423d253fcb17, + 0x08803ceafa39eb14, + 0xf18602df9851c7af, + 0x0400f3a049e3414b, + // 2^140 * 3 * G + 0xabce0476ba61c55b, + 0x36a3d6d7c4d39716, + 0x6eb259d5e8d82d09, + 0x0c9176e984d756fb, + 0x2efba412a06e7b06, + 0x146785452c8d2560, + 0xdf9713ebd67a91c7, + 0x32830ac7157eadf3, + 0x0e782a7ab73769e8, + 0x04a05d7875b18e2c, + 0x29525226ebcceae1, + 0x0d794f8383eba820, + // 2^140 * 4 * G + 0xff35f5cb9e1516f4, + 0xee805bcf648aae45, + 0xf0d73c2bb93a9ef3, + 0x097b0bf22092a6c2, + 0x7be44ce7a7a2e1ac, + 0x411fd93efad1b8b7, + 0x1734a1d70d5f7c9b, + 0x0d6592233127db16, + 0xc48bab1521a9d733, + 0xa6c2eaead61abb25, + 0x625c6c1cc6cb4305, + 0x7fc90fea93eb3a67, + // 2^140 * 5 * G + 0x0408f1fe1f5c5926, + 0x1a8f2f5e3b258bf4, + 0x40a951a2fdc71669, + 0x6598ee93c98b577e, + 0xc527deb59c7cb23d, + 0x955391695328404e, + 0xd64392817ccf2c7a, + 0x6ce97dabf7d8fa11, + 0x25b5a8e50ef7c48f, + 0xeb6034116f2ce532, + 0xc5e75173e53de537, + 0x73119fa08c12bb03, + // 2^140 * 6 * G + 0xed30129453f1a4cb, + 0xbce621c9c8f53787, + 0xfacb2b1338bee7b9, + 0x3025798a9ea8428c, + 0x7845b94d21f4774d, + 0xbf62f16c7897b727, + 0x671857c03c56522b, + 0x3cd6a85295621212, + 0x3fecde923aeca999, + 0xbdaa5b0062e8c12f, + 0x67b99dfc96988ade, + 0x3f52c02852661036, + // 2^140 * 7 * G + 0xffeaa48e2a1351c6, + 0x28624754fa7f53d7, + 0x0b5ba9e57582ddf1, + 0x60c0104ba696ac59, + 0x9258bf99eec416c6, + 0xac8a5017a9d2f671, + 0x629549ab16dea4ab, + 0x05d0e85c99091569, + 0x051de020de9cbe97, + 0xfa07fc56b50bcf74, + 0x378cec9f0f11df65, + 0x36853c69ab96de4d, + // 2^140 * 8 * G + 0x36d9b8de78f39b2d, + 0x7f42ed71a847b9ec, + 0x241cd1d679bd3fde, + 0x6a704fec92fbce6b, + 0x4433c0b0fac5e7be, + 0x724bae854c08dcbe, + 0xf1f24cc446978f9b, + 0x4a0aff6d62825fc8, + 0xe917fb9e61095301, + 0xc102df9402a092f8, + 0xbf09e2f5fa66190b, + 0x681109bee0dcfe37, + // 2^144 * 1 * G + 0x559a0cc9782a0dde, + 0x551dcdb2ea718385, + 0x7f62865b31ef238c, + 0x504aa7767973613d, + 0x9c18fcfa36048d13, + 0x29159db373899ddd, + 0xdc9f350b9f92d0aa, + 0x26f57eee878a19d4, + 0x0cab2cd55687efb1, + 0x5180d162247af17b, + 0x85c15a344f5a2467, + 0x4041943d9dba3069, + // 2^144 * 2 * G + 0xc3c0eeba43ebcc96, + 0x8d749c9c26ea9caf, + 0xd9fa95ee1c77ccc6, + 0x1420a1d97684340f, + 0x4b217743a26caadd, + 0x47a6b424648ab7ce, + 0xcb1d4f7a03fbc9e3, + 0x12d931429800d019, + 0x00c67799d337594f, + 0x5e3c5140b23aa47b, + 0x44182854e35ff395, + 0x1b4f92314359a012, + // 2^144 * 3 * G + 0x3e5c109d89150951, + 0x39cefa912de9696a, + 0x20eae43f975f3020, + 0x239b572a7f132dae, + 0x33cf3030a49866b1, + 0x251f73d2215f4859, + 0xab82aa4051def4f6, + 0x5ff191d56f9a23f6, + 0x819ed433ac2d9068, + 0x2883ab795fc98523, + 0xef4572805593eb3d, + 0x020c526a758f36cb, + // 2^144 * 4 * G + 0x779834f89ed8dbbc, + 0xc8f2aaf9dc7ca46c, + 0xa9524cdca3e1b074, + 0x02aacc4615313877, + 0xe931ef59f042cc89, + 0x2c589c9d8e124bb6, + 0xadc8e18aaec75997, + 0x452cfe0a5602c50c, + 0x86a0f7a0647877df, + 0xbbc464270e607c9f, + 0xab17ea25f1fb11c9, + 0x4cfb7d7b304b877b, + // 2^144 * 5 * G + 0x72b43d6cb89b75fe, + 0x54c694d99c6adc80, + 0xb8c3aa373ee34c9f, + 0x14b4622b39075364, + 0xe28699c29789ef12, + 0x2b6ecd71df57190d, + 0xc343c857ecc970d0, + 0x5b1d4cbc434d3ac5, + 0xb6fb2615cc0a9f26, + 0x3a4f0e2bb88dcce5, + 0x1301498b3369a705, + 0x2f98f71258592dd1, + // 2^144 * 6 * G + 0x0c94a74cb50f9e56, + 0x5b1ff4a98e8e1320, + 0x9a2acc2182300f67, + 0x3a6ae249d806aaf9, + 0x2e12ae444f54a701, + 0xfcfe3ef0a9cbd7de, + 0xcebf890d75835de0, + 0x1d8062e9e7614554, + 0x657ada85a9907c5a, + 0x1a0ea8b591b90f62, + 0x8d0e1dfbdf34b4e9, + 0x298b8ce8aef25ff3, + // 2^144 * 7 * G + 0x2a927953eff70cb2, + 0x4b89c92a79157076, + 0x9418457a30a7cf6a, + 0x34b8a8404d5ce485, + 0x837a72ea0a2165de, + 0x3fab07b40bcf79f6, + 0x521636c77738ae70, + 0x6ba6271803a7d7dc, + 0xc26eecb583693335, + 0xd5a813df63b5fefd, + 0xa293aa9aa4b22573, + 0x71d62bdd465e1c6a, + // 2^144 * 8 * G + 0x6533cc28d378df80, + 0xf6db43790a0fa4b4, + 0xe3645ff9f701da5a, + 0x74d5f317f3172ba4, + 0xcd2db5dab1f75ef5, + 0xd77f95cf16b065f5, + 0x14571fea3f49f085, + 0x1c333621262b2b3d, + 0xa86fe55467d9ca81, + 0x398b7c752b298c37, + 0xda6d0892e3ac623b, + 0x4aebcc4547e9d98c, + // 2^148 * 1 * G + 0x53175a7205d21a77, + 0xb0c04422d3b934d4, + 0xadd9f24bdd5deadc, + 0x074f46e69f10ff8c, + 0x0de9b204a059a445, + 0xe15cb4aa4b17ad0f, + 0xe1bbec521f79c557, + 0x2633f1b9d071081b, + 0xc1fb4177018b9910, + 0xa6ea20dc6c0fe140, + 0xd661f3e74354c6ff, + 0x5ecb72e6f1a3407a, + // 2^148 * 2 * G + 0xa515a31b2259fb4e, + 0x0960f3972bcac52f, + 0xedb52fec8d3454cb, + 0x382e2720c476c019, + 0xfeeae106e8e86997, + 0x9863337f98d09383, + 0x9470480eaa06ebef, + 0x038b6898d4c5c2d0, + 0xf391c51d8ace50a6, + 0x3142d0b9ae2d2948, + 0xdb4d5a1a7f24ca80, + 0x21aeba8b59250ea8, + // 2^148 * 3 * G + 0x24f13b34cf405530, + 0x3c44ea4a43088af7, + 0x5dd5c5170006a482, + 0x118eb8f8890b086d, + 0x53853600f0087f23, + 0x4c461879da7d5784, + 0x6af303deb41f6860, + 0x0a3c16c5c27c18ed, + 0x17e49c17cc947f3d, + 0xccc6eda6aac1d27b, + 0xdf6092ceb0f08e56, + 0x4909b3e22c67c36b, + // 2^148 * 4 * G + 0x9c9c85ea63fe2e89, + 0xbe1baf910e9412ec, + 0x8f7baa8a86fbfe7b, + 0x0fb17f9fef968b6c, + 0x59a16676706ff64e, + 0x10b953dd0d86a53d, + 0x5848e1e6ce5c0b96, + 0x2d8b78e712780c68, + 0x79d5c62eafc3902b, + 0x773a215289e80728, + 0xc38ae640e10120b9, + 0x09ae23717b2b1a6d, + // 2^148 * 5 * G + 0xbb6a192a4e4d083c, + 0x34ace0630029e192, + 0x98245a59aafabaeb, + 0x6d9c8a9ada97faac, + 0x10ab8fa1ad32b1d0, + 0xe9aced1be2778b24, + 0xa8856bc0373de90f, + 0x66f35ddddda53996, + 0xd27d9afb24997323, + 0x1bb7e07ef6f01d2e, + 0x2ba7472df52ecc7f, + 0x03019b4f646f9dc8, + // 2^148 * 6 * G + 0x04a186b5565345cd, + 0xeee76610bcc4116a, + 0x689c73b478fb2a45, + 0x387dcbff65697512, + 0xaf09b214e6b3dc6b, + 0x3f7573b5ad7d2f65, + 0xd019d988100a23b0, + 0x392b63a58b5c35f7, + 0x4093addc9c07c205, + 0xc565be15f532c37e, + 0x63dbecfd1583402a, + 0x61722b4aef2e032e, + // 2^148 * 7 * G + 0x0012aafeecbd47af, + 0x55a266fb1cd46309, + 0xf203eb680967c72c, + 0x39633944ca3c1429, + 0xd6b07a5581cb0e3c, + 0x290ff006d9444969, + 0x08680b6a16dcda1f, + 0x5568d2b75a06de59, + 0x8d0cb88c1b37cfe1, + 0x05b6a5a3053818f3, + 0xf2e9bc04b787d959, + 0x6beba1249add7f64, + // 2^148 * 8 * G + 0x1d06005ca5b1b143, + 0x6d4c6bb87fd1cda2, + 0x6ef5967653fcffe7, + 0x097c29e8c1ce1ea5, + 0x5c3cecb943f5a53b, + 0x9cc9a61d06c08df2, + 0xcfba639a85895447, + 0x5a845ae80df09fd5, + 0x4ce97dbe5deb94ca, + 0x38d0a4388c709c48, + 0xc43eced4a169d097, + 0x0a1249fff7e587c3, + // 2^152 * 1 * G + 0x12f0071b276d01c9, + 0xe7b8bac586c48c70, + 0x5308129b71d6fba9, + 0x5d88fbf95a3db792, + 0x0b408d9e7354b610, + 0x806b32535ba85b6e, + 0xdbe63a034a58a207, + 0x173bd9ddc9a1df2c, + 0x2b500f1efe5872df, + 0x58d6582ed43918c1, + 0xe6ed278ec9673ae0, + 0x06e1cd13b19ea319, + // 2^152 * 2 * G + 0x40d0ad516f166f23, + 0x118e32931fab6abe, + 0x3fe35e14a04d088e, + 0x3080603526e16266, + 0x472baf629e5b0353, + 0x3baa0b90278d0447, + 0x0c785f469643bf27, + 0x7f3a6a1a8d837b13, + 0xf7e644395d3d800b, + 0x95a8d555c901edf6, + 0x68cd7830592c6339, + 0x30d0fded2e51307e, + // 2^152 * 3 * G + 0xe0594d1af21233b3, + 0x1bdbe78ef0cc4d9c, + 0x6965187f8f499a77, + 0x0a9214202c099868, + 0x9cb4971e68b84750, + 0xa09572296664bbcf, + 0x5c8de72672fa412b, + 0x4615084351c589d9, + 0xbc9019c0aeb9a02e, + 0x55c7110d16034cae, + 0x0e6df501659932ec, + 0x3bca0d2895ca5dfe, + // 2^152 * 4 * G + 0x40f031bc3c5d62a4, + 0x19fc8b3ecff07a60, + 0x98183da2130fb545, + 0x5631deddae8f13cd, + 0x9c688eb69ecc01bf, + 0xf0bc83ada644896f, + 0xca2d955f5f7a9fe2, + 0x4ea8b4038df28241, + 0x2aed460af1cad202, + 0x46305305a48cee83, + 0x9121774549f11a5f, + 0x24ce0930542ca463, + // 2^152 * 5 * G + 0x1fe890f5fd06c106, + 0xb5c468355d8810f2, + 0x827808fe6e8caf3e, + 0x41d4e3c28a06d74b, + 0x3fcfa155fdf30b85, + 0xd2f7168e36372ea4, + 0xb2e064de6492f844, + 0x549928a7324f4280, + 0xf26e32a763ee1a2e, + 0xae91e4b7d25ffdea, + 0xbc3bd33bd17f4d69, + 0x491b66dec0dcff6a, + // 2^152 * 6 * G + 0x98f5b13dc7ea32a7, + 0xe3d5f8cc7e16db98, + 0xac0abf52cbf8d947, + 0x08f338d0c85ee4ac, + 0x75f04a8ed0da64a1, + 0xed222caf67e2284b, + 0x8234a3791f7b7ba4, + 0x4cf6b8b0b7018b67, + 0xc383a821991a73bd, + 0xab27bc01df320c7a, + 0xc13d331b84777063, + 0x530d4a82eb078a99, + // 2^152 * 7 * G + 0x004c3630e1f94825, + 0x7e2d78268cab535a, + 0xc7482323cc84ff8b, + 0x65ea753f101770b9, + 0x6d6973456c9abf9e, + 0x257fb2fc4900a880, + 0x2bacf412c8cfb850, + 0x0db3e7e00cbfbd5b, + 0x3d66fc3ee2096363, + 0x81d62c7f61b5cb6b, + 0x0fbe044213443b1a, + 0x02a4ec1921e1a1db, + // 2^152 * 8 * G + 0x5ce6259a3b24b8a2, + 0xb8577acc45afa0b8, + 0xcccbe6e88ba07037, + 0x3d143c51127809bf, + 0xf5c86162f1cf795f, + 0x118c861926ee57f2, + 0x172124851c063578, + 0x36d12b5dec067fcf, + 0x126d279179154557, + 0xd5e48f5cfc783a0a, + 0x36bdb6e8df179bac, + 0x2ef517885ba82859, + // 2^156 * 1 * G + 0x88bd438cd11e0d4a, + 0x30cb610d43ccf308, + 0xe09a0e3791937bcc, + 0x4559135b25b1720c, + 0x1ea436837c6da1e9, + 0xf9c189af1fb9bdbe, + 0x303001fcce5dd155, + 0x28a7c99ebc57be52, + 0xb8fd9399e8d19e9d, + 0x908191cb962423ff, + 0xb2b948d747c742a3, + 0x37f33226d7fb44c4, + // 2^156 * 2 * G + 0x0dae8767b55f6e08, + 0x4a43b3b35b203a02, + 0xe3725a6e80af8c79, + 0x0f7a7fd1705fa7a3, + 0x33912553c821b11d, + 0x66ed42c241e301df, + 0x066fcc11104222fd, + 0x307a3b41c192168f, + 0x8eeb5d076eb55ce0, + 0x2fc536bfaa0d925a, + 0xbe81830fdcb6c6e8, + 0x556c7045827baf52, + // 2^156 * 3 * G + 0x8e2b517302e9d8b7, + 0xe3e52269248714e8, + 0xbd4fbd774ca960b5, + 0x6f4b4199c5ecada9, + 0xb94b90022bf44406, + 0xabd4237eff90b534, + 0x7600a960faf86d3a, + 0x2f45abdac2322ee3, + 0x61af4912c8ef8a6a, + 0xe58fa4fe43fb6e5e, + 0xb5afcc5d6fd427cf, + 0x6a5393281e1e11eb, + // 2^156 * 4 * G + 0xf3da5139a5d1ee89, + 0x8145457cff936988, + 0x3f622fed00e188c4, + 0x0f513815db8b5a3d, + 0x0fff04fe149443cf, + 0x53cac6d9865cddd7, + 0x31385b03531ed1b7, + 0x5846a27cacd1039d, + 0x4ff5cdac1eb08717, + 0x67e8b29590f2e9bc, + 0x44093b5e237afa99, + 0x0d414bed8708b8b2, + // 2^156 * 5 * G + 0xcfb68265fd0e75f6, + 0xe45b3e28bb90e707, + 0x7242a8de9ff92c7a, + 0x685b3201933202dd, + 0x81886a92294ac9e8, + 0x23162b45d55547be, + 0x94cfbc4403715983, + 0x50eb8fdb134bc401, + 0xc0b73ec6d6b330cd, + 0x84e44807132faff1, + 0x732b7352c4a5dee1, + 0x5d7c7cf1aa7cd2d2, + // 2^156 * 6 * G + 0xaf3b46bf7a4aafa2, + 0xb78705ec4d40d411, + 0x114f0c6aca7c15e3, + 0x3f364faaa9489d4d, + 0x33d1013e9b73a562, + 0x925cef5748ec26e1, + 0xa7fce614dd468058, + 0x78b0fad41e9aa438, + 0xbf56a431ed05b488, + 0xa533e66c9c495c7e, + 0xe8652baf87f3651a, + 0x0241800059d66c33, + // 2^156 * 7 * G + 0xceb077fea37a5be4, + 0xdb642f02e5a5eeb7, + 0xc2e6d0c5471270b8, + 0x4771b65538e4529c, + 0x28350c7dcf38ea01, + 0x7c6cdbc0b2917ab6, + 0xace7cfbe857082f7, + 0x4d2845aba2d9a1e0, + 0xbb537fe0447070de, + 0xcba744436dd557df, + 0xd3b5a3473600dbcb, + 0x4aeabbe6f9ffd7f8, + // 2^156 * 8 * G + 0x4630119e40d8f78c, + 0xa01a9bc53c710e11, + 0x486d2b258910dd79, + 0x1e6c47b3db0324e5, + 0x6a2134bcc4a9c8f2, + 0xfbf8fd1c8ace2e37, + 0x000ae3049911a0ba, + 0x046e3a616bc89b9e, + 0x14e65442f03906be, + 0x4a019d54e362be2a, + 0x68ccdfec8dc230c7, + 0x7cfb7e3faf6b861c, + // 2^160 * 1 * G + 0x4637974e8c58aedc, + 0xb9ef22fbabf041a4, + 0xe185d956e980718a, + 0x2f1b78fab143a8a6, + 0x96eebffb305b2f51, + 0xd3f938ad889596b8, + 0xf0f52dc746d5dd25, + 0x57968290bb3a0095, + 0xf71ab8430a20e101, + 0xf393658d24f0ec47, + 0xcf7509a86ee2eed1, + 0x7dc43e35dc2aa3e1, + // 2^160 * 2 * G + 0x85966665887dd9c3, + 0xc90f9b314bb05355, + 0xc6e08df8ef2079b1, + 0x7ef72016758cc12f, + 0x5a782a5c273e9718, + 0x3576c6995e4efd94, + 0x0f2ed8051f237d3e, + 0x044fb81d82d50a99, + 0xc1df18c5a907e3d9, + 0x57b3371dce4c6359, + 0xca704534b201bb49, + 0x7f79823f9c30dd2e, + // 2^160 * 3 * G + 0x8334d239a3b513e8, + 0xc13670d4b91fa8d8, + 0x12b54136f590bd33, + 0x0a4e0373d784d9b4, + 0x6a9c1ff068f587ba, + 0x0827894e0050c8de, + 0x3cbf99557ded5be7, + 0x64a9b0431c06d6f0, + 0x2eb3d6a15b7d2919, + 0xb0b4f6a0d53a8235, + 0x7156ce4389a45d47, + 0x071a7d0ace18346c, + // 2^160 * 4 * G + 0xd3072daac887ba0b, + 0x01262905bfa562ee, + 0xcf543002c0ef768b, + 0x2c3bcc7146ea7e9c, + 0xcc0c355220e14431, + 0x0d65950709b15141, + 0x9af5621b209d5f36, + 0x7c69bcf7617755d3, + 0x07f0d7eb04e8295f, + 0x10db18252f50f37d, + 0xe951a9a3171798d7, + 0x6f5a9a7322aca51d, + // 2^160 * 5 * G + 0x8ba1000c2f41c6c5, + 0xc49f79c10cfefb9b, + 0x4efa47703cc51c9f, + 0x494e21a2e147afca, + 0xe729d4eba3d944be, + 0x8d9e09408078af9e, + 0x4525567a47869c03, + 0x02ab9680ee8d3b24, + 0xefa48a85dde50d9a, + 0x219a224e0fb9a249, + 0xfa091f1dd91ef6d9, + 0x6b5d76cbea46bb34, + // 2^160 * 6 * G + 0x8857556cec0cd994, + 0x6472dc6f5cd01dba, + 0xaf0169148f42b477, + 0x0ae333f685277354, + 0xe0f941171e782522, + 0xf1e6ae74036936d3, + 0x408b3ea2d0fcc746, + 0x16fb869c03dd313e, + 0x288e199733b60962, + 0x24fc72b4d8abe133, + 0x4811f7ed0991d03e, + 0x3f81e38b8f70d075, + // 2^160 * 7 * G + 0x7f910fcc7ed9affe, + 0x545cb8a12465874b, + 0xa8397ed24b0c4704, + 0x50510fc104f50993, + 0x0adb7f355f17c824, + 0x74b923c3d74299a4, + 0xd57c3e8bcbf8eaf7, + 0x0ad3e2d34cdedc3d, + 0x6f0c0fc5336e249d, + 0x745ede19c331cfd9, + 0xf2d6fd0009eefe1c, + 0x127c158bf0fa1ebe, + // 2^160 * 8 * G + 0xf6197c422e9879a2, + 0xa44addd452ca3647, + 0x9b413fc14b4eaccb, + 0x354ef87d07ef4f68, + 0xdea28fc4ae51b974, + 0x1d9973d3744dfe96, + 0x6240680b873848a8, + 0x4ed82479d167df95, + 0xfee3b52260c5d975, + 0x50352efceb41b0b8, + 0x8808ac30a9f6653c, + 0x302d92d20539236d, + // 2^164 * 1 * G + 0x4c59023fcb3efb7c, + 0x6c2fcb99c63c2a94, + 0xba4190e2c3c7e084, + 0x0e545daea51874d9, + 0x957b8b8b0df53c30, + 0x2a1c770a8e60f098, + 0xbbc7a670345796de, + 0x22a48f9a90c99bc9, + 0x6b7dc0dc8d3fac58, + 0x5497cd6ce6e42bfd, + 0x542f7d1bf400d305, + 0x4159f47f048d9136, + // 2^164 * 2 * G + 0x20ad660839e31e32, + 0xf81e1bd58405be50, + 0xf8064056f4dabc69, + 0x14d23dd4ce71b975, + 0x748515a8bbd24839, + 0x77128347afb02b55, + 0x50ba2ac649a2a17f, + 0x060525513ad730f1, + 0xf2398e098aa27f82, + 0x6d7982bb89a1b024, + 0xfa694084214dd24c, + 0x71ab966fa32301c3, + // 2^164 * 3 * G + 0x2dcbd8e34ded02fc, + 0x1151f3ec596f22aa, + 0xbca255434e0328da, + 0x35768fbe92411b22, + 0xb1088a0702809955, + 0x43b273ea0b43c391, + 0xca9b67aefe0686ed, + 0x605eecbf8335f4ed, + 0x83200a656c340431, + 0x9fcd71678ee59c2f, + 0x75d4613f71300f8a, + 0x7a912faf60f542f9, + // 2^164 * 4 * G + 0xb204585e5edc1a43, + 0x9f0e16ee5897c73c, + 0x5b82c0ae4e70483c, + 0x624a170e2bddf9be, + 0x253f4f8dfa2d5597, + 0x25e49c405477130c, + 0x00c052e5996b1102, + 0x33cb966e33bb6c4a, + 0x597028047f116909, + 0x828ac41c1e564467, + 0x70417dbde6217387, + 0x721627aefbac4384, + // 2^164 * 5 * G + 0x97d03bc38736add5, + 0x2f1422afc532b130, + 0x3aa68a057101bbc4, + 0x4c946cf7e74f9fa7, + 0xfd3097bc410b2f22, + 0xf1a05da7b5cfa844, + 0x61289a1def57ca74, + 0x245ea199bb821902, + 0xaedca66978d477f8, + 0x1898ba3c29117fe1, + 0xcf73f983720cbd58, + 0x67da12e6b8b56351, + // 2^164 * 6 * G + 0x7067e187b4bd6e07, + 0x6e8f0203c7d1fe74, + 0x93c6aa2f38c85a30, + 0x76297d1f3d75a78a, + 0x2b7ef3d38ec8308c, + 0x828fd7ec71eb94ab, + 0x807c3b36c5062abd, + 0x0cb64cb831a94141, + 0x3030fc33534c6378, + 0xb9635c5ce541e861, + 0x15d9a9bed9b2c728, + 0x49233ea3f3775dcb, + // 2^164 * 7 * G + 0x629398fa8dbffc3a, + 0xe12fe52dd54db455, + 0xf3be11dfdaf25295, + 0x628b140dce5e7b51, + 0x7b3985fe1c9f249b, + 0x4fd6b2d5a1233293, + 0xceb345941adf4d62, + 0x6987ff6f542de50c, + 0x47e241428f83753c, + 0x6317bebc866af997, + 0xdabb5b433d1a9829, + 0x074d8d245287fb2d, + // 2^164 * 8 * G + 0x8337d9cd440bfc31, + 0x729d2ca1af318fd7, + 0xa040a4a4772c2070, + 0x46002ef03a7349be, + 0x481875c6c0e31488, + 0x219429b2e22034b4, + 0x7223c98a31283b65, + 0x3420d60b342277f9, + 0xfaa23adeaffe65f7, + 0x78261ed45be0764c, + 0x441c0a1e2f164403, + 0x5aea8e567a87d395, + // 2^168 * 1 * G + 0x7813c1a2bca4283d, + 0xed62f091a1863dd9, + 0xaec7bcb8c268fa86, + 0x10e5d3b76f1cae4c, + 0x2dbc6fb6e4e0f177, + 0x04e1bf29a4bd6a93, + 0x5e1966d4787af6e8, + 0x0edc5f5eb426d060, + 0x5453bfd653da8e67, + 0xe9dc1eec24a9f641, + 0xbf87263b03578a23, + 0x45b46c51361cba72, + // 2^168 * 2 * G + 0xa9402abf314f7fa1, + 0xe257f1dc8e8cf450, + 0x1dbbd54b23a8be84, + 0x2177bfa36dcb713b, + 0xce9d4ddd8a7fe3e4, + 0xab13645676620e30, + 0x4b594f7bb30e9958, + 0x5c1c0aef321229df, + 0x37081bbcfa79db8f, + 0x6048811ec25f59b3, + 0x087a76659c832487, + 0x4ae619387d8ab5bb, + // 2^168 * 3 * G + 0x8ddbf6aa5344a32e, + 0x7d88eab4b41b4078, + 0x5eb0eb974a130d60, + 0x1a00d91b17bf3e03, + 0x61117e44985bfb83, + 0xfce0462a71963136, + 0x83ac3448d425904b, + 0x75685abe5ba43d64, + 0x6e960933eb61f2b2, + 0x543d0fa8c9ff4952, + 0xdf7275107af66569, + 0x135529b623b0e6aa, + // 2^168 * 4 * G + 0x18f0dbd7add1d518, + 0x979f7888cfc11f11, + 0x8732e1f07114759b, + 0x79b5b81a65ca3a01, + 0xf5c716bce22e83fe, + 0xb42beb19e80985c1, + 0xec9da63714254aae, + 0x5972ea051590a613, + 0x0fd4ac20dc8f7811, + 0x9a9ad294ac4d4fa8, + 0xc01b2d64b3360434, + 0x4f7e9c95905f3bdb, + // 2^168 * 5 * G + 0x62674bbc5781302e, + 0xd8520f3989addc0f, + 0x8c2999ae53fbd9c6, + 0x31993ad92e638e4c, + 0x71c8443d355299fe, + 0x8bcd3b1cdbebead7, + 0x8092499ef1a49466, + 0x1942eec4a144adc8, + 0x7dac5319ae234992, + 0x2c1b3d910cea3e92, + 0x553ce494253c1122, + 0x2a0a65314ef9ca75, + // 2^168 * 6 * G + 0x2db7937ff7f927c2, + 0xdb741f0617d0a635, + 0x5982f3a21155af76, + 0x4cf6e218647c2ded, + 0xcf361acd3c1c793a, + 0x2f9ebcac5a35bc3b, + 0x60e860e9a8cda6ab, + 0x055dc39b6dea1a13, + 0xb119227cc28d5bb6, + 0x07e24ebc774dffab, + 0xa83c78cee4a32c89, + 0x121a307710aa24b6, + // 2^168 * 7 * G + 0xe4db5d5e9f034a97, + 0xe153fc093034bc2d, + 0x460546919551d3b1, + 0x333fc76c7a40e52d, + 0xd659713ec77483c9, + 0x88bfe077b82b96af, + 0x289e28231097bcd3, + 0x527bb94a6ced3a9b, + 0x563d992a995b482e, + 0x3405d07c6e383801, + 0x485035de2f64d8e5, + 0x6b89069b20a7a9f7, + // 2^168 * 8 * G + 0x812aa0416270220d, + 0x995a89faf9245b4e, + 0xffadc4ce5072ef05, + 0x23bc2103aa73eb73, + 0x4082fa8cb5c7db77, + 0x068686f8c734c155, + 0x29e6c8d9f6e7a57e, + 0x0473d308a7639bcf, + 0xcaee792603589e05, + 0x2b4b421246dcc492, + 0x02a1ef74e601a94f, + 0x102f73bfde04341a, + // 2^172 * 1 * G + 0xb5a2d50c7ec20d3e, + 0xc64bdd6ea0c97263, + 0x56e89052c1ff734d, + 0x4929c6f72b2ffaba, + 0x358ecba293a36247, + 0xaf8f9862b268fd65, + 0x412f7e9968a01c89, + 0x5786f312cd754524, + 0x337788ffca14032c, + 0xf3921028447f1ee3, + 0x8b14071f231bccad, + 0x4c817b4bf2344783, + // 2^172 * 2 * G + 0x0ff853852871b96e, + 0xe13e9fab60c3f1bb, + 0xeefd595325344402, + 0x0a37c37075b7744b, + 0x413ba057a40b4484, + 0xba4c2e1a4f5f6a43, + 0x614ba0a5aee1d61c, + 0x78a1531a8b05dc53, + 0x6cbdf1703ad0562b, + 0x8ecf4830c92521a3, + 0xdaebd303fd8424e7, + 0x72ad82a42e5ec56f, + // 2^172 * 3 * G + 0x3f9e8e35bafb65f6, + 0x39d69ec8f27293a1, + 0x6cb8cd958cf6a3d0, + 0x1734778173adae6d, + 0xc368939167024bc3, + 0x8e69d16d49502fda, + 0xfcf2ec3ce45f4b29, + 0x065f669ea3b4cbc4, + 0x8a00aec75532db4d, + 0xb869a4e443e31bb1, + 0x4a0f8552d3a7f515, + 0x19adeb7c303d7c08, + // 2^172 * 4 * G + 0xc720cb6153ead9a3, + 0x55b2c97f512b636e, + 0xb1e35b5fd40290b1, + 0x2fd9ccf13b530ee2, + 0x9d05ba7d43c31794, + 0x2470c8ff93322526, + 0x8323dec816197438, + 0x2852709881569b53, + 0x07bd475b47f796b8, + 0xd2c7b013542c8f54, + 0x2dbd23f43b24f87e, + 0x6551afd77b0901d6, + // 2^172 * 5 * G + 0x4546baaf54aac27f, + 0xf6f66fecb2a45a28, + 0x582d1b5b562bcfe8, + 0x44b123f3920f785f, + 0x68a24ce3a1d5c9ac, + 0xbb77a33d10ff6461, + 0x0f86ce4425d3166e, + 0x56507c0950b9623b, + 0x1206f0b7d1713e63, + 0x353fe3d915bafc74, + 0x194ceb970ad9d94d, + 0x62fadd7cf9d03ad3, + // 2^172 * 6 * G + 0xc6b5967b5598a074, + 0x5efe91ce8e493e25, + 0xd4b72c4549280888, + 0x20ef1149a26740c2, + 0x3cd7bc61e7ce4594, + 0xcd6b35a9b7dd267e, + 0xa080abc84366ef27, + 0x6ec7c46f59c79711, + 0x2f07ad636f09a8a2, + 0x8697e6ce24205e7d, + 0xc0aefc05ee35a139, + 0x15e80958b5f9d897, + // 2^172 * 7 * G + 0x25a5ef7d0c3e235b, + 0x6c39c17fbe134ee7, + 0xc774e1342dc5c327, + 0x021354b892021f39, + 0x4dd1ed355bb061c4, + 0x42dc0cef941c0700, + 0x61305dc1fd86340e, + 0x56b2cc930e55a443, + 0x1df79da6a6bfc5a2, + 0x02f3a2749fde4369, + 0xb323d9f2cda390a7, + 0x7be0847b8774d363, + // 2^172 * 8 * G + 0x8c99cc5a8b3f55c3, + 0x0611d7253fded2a0, + 0xed2995ff36b70a36, + 0x1f699a54d78a2619, + 0x1466f5af5307fa11, + 0x817fcc7ded6c0af2, + 0x0a6de44ec3a4a3fb, + 0x74071475bc927d0b, + 0xe77292f373e7ea8a, + 0x296537d2cb045a31, + 0x1bd0653ed3274fde, + 0x2f9a2c4476bd2966, + // 2^176 * 1 * G + 0xeb18b9ab7f5745c6, + 0x023a8aee5787c690, + 0xb72712da2df7afa9, + 0x36597d25ea5c013d, + 0xa2b4dae0b5511c9a, + 0x7ac860292bffff06, + 0x981f375df5504234, + 0x3f6bd725da4ea12d, + 0x734d8d7b106058ac, + 0xd940579e6fc6905f, + 0x6466f8f99202932d, + 0x7b7ecc19da60d6d0, + // 2^176 * 2 * G + 0x78c2373c695c690d, + 0xdd252e660642906e, + 0x951d44444ae12bd2, + 0x4235ad7601743956, + 0x6dae4a51a77cfa9b, + 0x82263654e7a38650, + 0x09bbffcd8f2d82db, + 0x03bedc661bf5caba, + 0x6258cb0d078975f5, + 0x492942549189f298, + 0xa0cab423e2e36ee4, + 0x0e7ce2b0cdf066a1, + // 2^176 * 3 * G + 0xc494643ac48c85a3, + 0xfd361df43c6139ad, + 0x09db17dd3ae94d48, + 0x666e0a5d8fb4674a, + 0xfea6fedfd94b70f9, + 0xf130c051c1fcba2d, + 0x4882d47e7f2fab89, + 0x615256138aeceeb5, + 0x2abbf64e4870cb0d, + 0xcd65bcf0aa458b6b, + 0x9abe4eba75e8985d, + 0x7f0bc810d514dee4, + // 2^176 * 4 * G + 0xb9006ba426f4136f, + 0x8d67369e57e03035, + 0xcbc8dfd94f463c28, + 0x0d1f8dbcf8eedbf5, + 0x83ac9dad737213a0, + 0x9ff6f8ba2ef72e98, + 0x311e2edd43ec6957, + 0x1d3a907ddec5ab75, + 0xba1693313ed081dc, + 0x29329fad851b3480, + 0x0128013c030321cb, + 0x00011b44a31bfde3, + // 2^176 * 5 * G + 0x3fdfa06c3fc66c0c, + 0x5d40e38e4dd60dd2, + 0x7ae38b38268e4d71, + 0x3ac48d916e8357e1, + 0x16561f696a0aa75c, + 0xc1bf725c5852bd6a, + 0x11a8dd7f9a7966ad, + 0x63d988a2d2851026, + 0x00120753afbd232e, + 0xe92bceb8fdd8f683, + 0xf81669b384e72b91, + 0x33fad52b2368a066, + // 2^176 * 6 * G + 0x540649c6c5e41e16, + 0x0af86430333f7735, + 0xb2acfcd2f305e746, + 0x16c0f429a256dca7, + 0x8d2cc8d0c422cfe8, + 0x072b4f7b05a13acb, + 0xa3feb6e6ecf6a56f, + 0x3cc355ccb90a71e2, + 0xe9b69443903e9131, + 0xb8a494cb7a5637ce, + 0xc87cd1a4baba9244, + 0x631eaf426bae7568, + // 2^176 * 7 * G + 0xb3e90410da66fe9f, + 0x85dd4b526c16e5a6, + 0xbc3d97611ef9bf83, + 0x5599648b1ea919b5, + 0x47d975b9a3700de8, + 0x7280c5fbe2f80552, + 0x53658f2732e45de1, + 0x431f2c7f665f80b5, + 0xd6026344858f7b19, + 0x14ab352fa1ea514a, + 0x8900441a2090a9d7, + 0x7b04715f91253b26, + // 2^176 * 8 * G + 0x83edbd28acf6ae43, + 0x86357c8b7d5c7ab4, + 0xc0404769b7eb2c44, + 0x59b37bf5c2f6583f, + 0xb376c280c4e6bac6, + 0x970ed3dd6d1d9b0b, + 0xb09a9558450bf944, + 0x48d0acfa57cde223, + 0xb60f26e47dabe671, + 0xf1d1a197622f3a37, + 0x4208ce7ee9960394, + 0x16234191336d3bdb, + // 2^180 * 1 * G + 0xf19aeac733a63aef, + 0x2c7fba5d4442454e, + 0x5da87aa04795e441, + 0x413051e1a4e0b0f5, + 0x852dd1fd3d578bbe, + 0x2b65ce72c3286108, + 0x658c07f4eace2273, + 0x0933f804ec38ab40, + 0xa7ab69798d496476, + 0x8121aadefcb5abc8, + 0xa5dc12ef7b539472, + 0x07fd47065e45351a, + // 2^180 * 2 * G + 0xc8583c3d258d2bcd, + 0x17029a4daf60b73f, + 0xfa0fc9d6416a3781, + 0x1c1e5fba38b3fb23, + 0x304211559ae8e7c3, + 0xf281b229944882a5, + 0x8a13ac2e378250e4, + 0x014afa0954ba48f4, + 0xcb3197001bb3666c, + 0x330060524bffecb9, + 0x293711991a88233c, + 0x291884363d4ed364, + // 2^180 * 3 * G + 0x033c6805dc4babfa, + 0x2c15bf5e5596ecc1, + 0x1bc70624b59b1d3b, + 0x3ede9850a19f0ec5, + 0xfb9d37c3bc1ab6eb, + 0x02be14534d57a240, + 0xf4d73415f8a5e1f6, + 0x5964f4300ccc8188, + 0xe44a23152d096800, + 0x5c08c55970866996, + 0xdf2db60a46affb6e, + 0x579155c1f856fd89, + // 2^180 * 4 * G + 0x96324edd12e0c9ef, + 0x468b878df2420297, + 0x199a3776a4f573be, + 0x1e7fbcf18e91e92a, + 0xb5f16b630817e7a6, + 0x808c69233c351026, + 0x324a983b54cef201, + 0x53c092084a485345, + 0xd2d41481f1cbafbf, + 0x231d2db6716174e5, + 0x0b7d7656e2a55c98, + 0x3e955cd82aa495f6, + // 2^180 * 5 * G + 0xe48f535e3ed15433, + 0xd075692a0d7270a3, + 0x40fbd21daade6387, + 0x14264887cf4495f5, + 0xab39f3ef61bb3a3f, + 0x8eb400652eb9193e, + 0xb5de6ecc38c11f74, + 0x654d7e9626f3c49f, + 0xe564cfdd5c7d2ceb, + 0x82eeafded737ccb9, + 0x6107db62d1f9b0ab, + 0x0b6baac3b4358dbb, + // 2^180 * 6 * G + 0x7ae62bcb8622fe98, + 0x47762256ceb891af, + 0x1a5a92bcf2e406b4, + 0x7d29401784e41501, + 0x204abad63700a93b, + 0xbe0023d3da779373, + 0xd85f0346633ab709, + 0x00496dc490820412, + 0x1c74b88dc27e6360, + 0x074854268d14850c, + 0xa145fb7b3e0dcb30, + 0x10843f1b43803b23, + // 2^180 * 7 * G + 0xc5f90455376276dd, + 0xce59158dd7645cd9, + 0x92f65d511d366b39, + 0x11574b6e526996c4, + 0xd56f672de324689b, + 0xd1da8aedb394a981, + 0xdd7b58fe9168cfed, + 0x7ce246cd4d56c1e8, + 0xb8f4308e7f80be53, + 0x5f3cb8cb34a9d397, + 0x18a961bd33cc2b2c, + 0x710045fb3a9af671, + // 2^180 * 8 * G + 0x73f93d36101b95eb, + 0xfaef33794f6f4486, + 0x5651735f8f15e562, + 0x7fa3f19058b40da1, + 0xa03fc862059d699e, + 0x2370cfa19a619e69, + 0xc4fe3b122f823deb, + 0x1d1b056fa7f0844e, + 0x1bc64631e56bf61f, + 0xd379ab106e5382a3, + 0x4d58c57e0540168d, + 0x566256628442d8e4, + // 2^184 * 1 * G + 0xb9e499def6267ff6, + 0x7772ca7b742c0843, + 0x23a0153fe9a4f2b1, + 0x2cdfdfecd5d05006, + 0xdd499cd61ff38640, + 0x29cd9bc3063625a0, + 0x51e2d8023dd73dc3, + 0x4a25707a203b9231, + 0x2ab7668a53f6ed6a, + 0x304242581dd170a1, + 0x4000144c3ae20161, + 0x5721896d248e49fc, + // 2^184 * 2 * G + 0x0b6e5517fd181bae, + 0x9022629f2bb963b4, + 0x5509bce932064625, + 0x578edd74f63c13da, + 0x285d5091a1d0da4e, + 0x4baa6fa7b5fe3e08, + 0x63e5177ce19393b3, + 0x03c935afc4b030fd, + 0x997276c6492b0c3d, + 0x47ccc2c4dfe205fc, + 0xdcd29b84dd623a3c, + 0x3ec2ab590288c7a2, + // 2^184 * 3 * G + 0xa1a0d27be4d87bb9, + 0xa98b4deb61391aed, + 0x99a0ddd073cb9b83, + 0x2dd5c25a200fcace, + 0xa7213a09ae32d1cb, + 0x0f2b87df40f5c2d5, + 0x0baea4c6e81eab29, + 0x0e1bf66c6adbac5e, + 0xe2abd5e9792c887e, + 0x1a020018cb926d5d, + 0xbfba69cdbaae5f1e, + 0x730548b35ae88f5f, + // 2^184 * 4 * G + 0xc43551a3cba8b8ee, + 0x65a26f1db2115f16, + 0x760f4f52ab8c3850, + 0x3043443b411db8ca, + 0x805b094ba1d6e334, + 0xbf3ef17709353f19, + 0x423f06cb0622702b, + 0x585a2277d87845dd, + 0xa18a5f8233d48962, + 0x6698c4b5ec78257f, + 0xa78e6fa5373e41ff, + 0x7656278950ef981f, + // 2^184 * 5 * G + 0x38c3cf59d51fc8c0, + 0x9bedd2fd0506b6f2, + 0x26bf109fab570e8f, + 0x3f4160a8c1b846a6, + 0xe17073a3ea86cf9d, + 0x3a8cfbb707155fdc, + 0x4853e7fc31838a8e, + 0x28bbf484b613f616, + 0xf2612f5c6f136c7c, + 0xafead107f6dd11be, + 0x527e9ad213de6f33, + 0x1e79cb358188f75d, + // 2^184 * 6 * G + 0x013436c3eef7e3f1, + 0x828b6a7ffe9e10f8, + 0x7ff908e5bcf9defc, + 0x65d7951b3a3b3831, + 0x77e953d8f5e08181, + 0x84a50c44299dded9, + 0xdc6c2d0c864525e5, + 0x478ab52d39d1f2f4, + 0x66a6a4d39252d159, + 0xe5dde1bc871ac807, + 0xb82c6b40a6c1c96f, + 0x16d87a411a212214, + // 2^184 * 7 * G + 0xb3bd7e5a42066215, + 0x879be3cd0c5a24c1, + 0x57c05db1d6f994b7, + 0x28f87c8165f38ca6, + 0xfba4d5e2d54e0583, + 0xe21fafd72ebd99fa, + 0x497ac2736ee9778f, + 0x1f990b577a5a6dde, + 0xa3344ead1be8f7d6, + 0x7d1e50ebacea798f, + 0x77c6569e520de052, + 0x45882fe1534d6d3e, + // 2^184 * 8 * G + 0x6669345d757983d6, + 0x62b6ed1117aa11a6, + 0x7ddd1857985e128f, + 0x688fe5b8f626f6dd, + 0xd8ac9929943c6fe4, + 0xb5f9f161a38392a2, + 0x2699db13bec89af3, + 0x7dcf843ce405f074, + 0x6c90d6484a4732c0, + 0xd52143fdca563299, + 0xb3be28c3915dc6e1, + 0x6739687e7327191b, + // 2^188 * 1 * G + 0x9f65c5ea200814cf, + 0x840536e169a31740, + 0x8b0ed13925c8b4ad, + 0x0080dbafe936361d, + 0x8ce5aad0c9cb971f, + 0x1156aaa99fd54a29, + 0x41f7247015af9b78, + 0x1fe8cca8420f49aa, + 0x72a1848f3c0cc82a, + 0x38c560c2877c9e54, + 0x5004e228ce554140, + 0x042418a103429d71, + // 2^188 * 2 * G + 0x899dea51abf3ff5f, + 0x9b93a8672fc2d8ba, + 0x2c38cb97be6ebd5c, + 0x114d578497263b5d, + 0x58e84c6f20816247, + 0x8db2b2b6e36fd793, + 0x977182561d484d85, + 0x0822024f8632abd7, + 0xb301bb7c6b1beca3, + 0x55393f6dc6eb1375, + 0x910d281097b6e4eb, + 0x1ad4548d9d479ea3, + // 2^188 * 3 * G + 0xcd5a7da0389a48fd, + 0xb38fa4aa9a78371e, + 0xc6d9761b2cdb8e6c, + 0x35cf51dbc97e1443, + 0xa06fe66d0fe9fed3, + 0xa8733a401c587909, + 0x30d14d800df98953, + 0x41ce5876c7b30258, + 0x59ac3bc5d670c022, + 0xeae67c109b119406, + 0x9798bdf0b3782fda, + 0x651e3201fd074092, + // 2^188 * 4 * G + 0xd63d8483ef30c5cf, + 0x4cd4b4962361cc0c, + 0xee90e500a48426ac, + 0x0af51d7d18c14eeb, + 0xa57ba4a01efcae9e, + 0x769f4beedc308a94, + 0xd1f10eeb3603cb2e, + 0x4099ce5e7e441278, + 0x1ac98e4f8a5121e9, + 0x7dae9544dbfa2fe0, + 0x8320aa0dd6430df9, + 0x667282652c4a2fb5, + // 2^188 * 5 * G + 0x874621f4d86bc9ab, + 0xb54c7bbe56fe6fea, + 0x077a24257fadc22c, + 0x1ab53be419b90d39, + 0xada8b6e02946db23, + 0x1c0ce51a7b253ab7, + 0x8448c85a66dd485b, + 0x7f1fc025d0675adf, + 0xd8ee1b18319ea6aa, + 0x004d88083a21f0da, + 0x3bd6aa1d883a4f4b, + 0x4db9a3a6dfd9fd14, + // 2^188 * 6 * G + 0x8ce7b23bb99c0755, + 0x35c5d6edc4f50f7a, + 0x7e1e2ed2ed9b50c3, + 0x36305f16e8934da1, + 0xd95b00bbcbb77c68, + 0xddbc846a91f17849, + 0x7cf700aebe28d9b3, + 0x5ce1285c85d31f3e, + 0x31b6972d98b0bde8, + 0x7d920706aca6de5b, + 0xe67310f8908a659f, + 0x50fac2a6efdf0235, + // 2^188 * 7 * G + 0xf3d3a9f35b880f5a, + 0xedec050cdb03e7c2, + 0xa896981ff9f0b1a2, + 0x49a4ae2bac5e34a4, + 0x295b1c86f6f449bc, + 0x51b2e84a1f0ab4dd, + 0xc001cb30aa8e551d, + 0x6a28d35944f43662, + 0x28bb12ee04a740e0, + 0x14313bbd9bce8174, + 0x72f5b5e4e8c10c40, + 0x7cbfb19936adcd5b, + // 2^188 * 8 * G + 0xa311ddc26b89792d, + 0x1b30b4c6da512664, + 0x0ca77b4ccf150859, + 0x1de443df1b009408, + 0x8e793a7acc36e6e0, + 0xf9fab7a37d586eed, + 0x3a4f9692bae1f4e4, + 0x1c14b03eff5f447e, + 0x19647bd114a85291, + 0x57b76cb21034d3af, + 0x6329db440f9d6dfa, + 0x5ef43e586a571493, + // 2^192 * 1 * G + 0xef782014385675a6, + 0xa2649f30aafda9e8, + 0x4cd1eb505cdfa8cb, + 0x46115aba1d4dc0b3, + 0xa66dcc9dc80c1ac0, + 0x97a05cf41b38a436, + 0xa7ebf3be95dbd7c6, + 0x7da0b8f68d7e7dab, + 0xd40f1953c3b5da76, + 0x1dac6f7321119e9b, + 0x03cc6021feb25960, + 0x5a5f887e83674b4b, + // 2^192 * 2 * G + 0x8f6301cf70a13d11, + 0xcfceb815350dd0c4, + 0xf70297d4a4bca47e, + 0x3669b656e44d1434, + 0x9e9628d3a0a643b9, + 0xb5c3cb00e6c32064, + 0x9b5302897c2dec32, + 0x43e37ae2d5d1c70c, + 0x387e3f06eda6e133, + 0x67301d5199a13ac0, + 0xbd5ad8f836263811, + 0x6a21e6cd4fd5e9be, + // 2^192 * 3 * G + 0xf1c6170a3046e65f, + 0x58712a2a00d23524, + 0x69dbbd3c8c82b755, + 0x586bf9f1a195ff57, + 0xef4129126699b2e3, + 0x71d30847708d1301, + 0x325432d01182b0bd, + 0x45371b07001e8b36, + 0xa6db088d5ef8790b, + 0x5278f0dc610937e5, + 0xac0349d261a16eb8, + 0x0eafb03790e52179, + // 2^192 * 4 * G + 0x960555c13748042f, + 0x219a41e6820baa11, + 0x1c81f73873486d0c, + 0x309acc675a02c661, + 0x5140805e0f75ae1d, + 0xec02fbe32662cc30, + 0x2cebdf1eea92396d, + 0x44ae3344c5435bb3, + 0x9cf289b9bba543ee, + 0xf3760e9d5ac97142, + 0x1d82e5c64f9360aa, + 0x62d5221b7f94678f, + // 2^192 * 5 * G + 0x524c299c18d0936d, + 0xc86bb56c8a0c1a0c, + 0xa375052edb4a8631, + 0x5c0efde4bc754562, + 0x7585d4263af77a3c, + 0xdfae7b11fee9144d, + 0xa506708059f7193d, + 0x14f29a5383922037, + 0xdf717edc25b2d7f5, + 0x21f970db99b53040, + 0xda9234b7c3ed4c62, + 0x5e72365c7bee093e, + // 2^192 * 6 * G + 0x575bfc074571217f, + 0x3779675d0694d95b, + 0x9a0a37bbf4191e33, + 0x77f1104c47b4eabc, + 0x7d9339062f08b33e, + 0x5b9659e5df9f32be, + 0xacff3dad1f9ebdfd, + 0x70b20555cb7349b7, + 0xbe5113c555112c4c, + 0x6688423a9a881fcd, + 0x446677855e503b47, + 0x0e34398f4a06404a, + // 2^192 * 7 * G + 0xb67d22d93ecebde8, + 0x09b3e84127822f07, + 0x743fa61fb05b6d8d, + 0x5e5405368a362372, + 0x18930b093e4b1928, + 0x7de3e10e73f3f640, + 0xf43217da73395d6f, + 0x6f8aded6ca379c3e, + 0xe340123dfdb7b29a, + 0x487b97e1a21ab291, + 0xf9967d02fde6949e, + 0x780de72ec8d3de97, + // 2^192 * 8 * G + 0x0ae28545089ae7bc, + 0x388ddecf1c7f4d06, + 0x38ac15510a4811b8, + 0x0eb28bf671928ce4, + 0x671feaf300f42772, + 0x8f72eb2a2a8c41aa, + 0x29a17fd797373292, + 0x1defc6ad32b587a6, + 0xaf5bbe1aef5195a7, + 0x148c1277917b15ed, + 0x2991f7fb7ae5da2e, + 0x467d201bf8dd2867, + // 2^196 * 1 * G + 0x7906ee72f7bd2e6b, + 0x05d270d6109abf4e, + 0x8d5cfe45b941a8a4, + 0x44c218671c974287, + 0x745f9d56296bc318, + 0x993580d4d8152e65, + 0xb0e5b13f5839e9ce, + 0x51fc2b28d43921c0, + 0x1b8fd11795e2a98c, + 0x1c4e5ee12b6b6291, + 0x5b30e7107424b572, + 0x6e6b9de84c4f4ac6, + // 2^196 * 2 * G + 0xdff25fce4b1de151, + 0xd841c0c7e11c4025, + 0x2554b3c854749c87, + 0x2d292459908e0df9, + 0x6b7c5f10f80cb088, + 0x736b54dc56e42151, + 0xc2b620a5c6ef99c4, + 0x5f4c802cc3a06f42, + 0x9b65c8f17d0752da, + 0x881ce338c77ee800, + 0xc3b514f05b62f9e3, + 0x66ed5dd5bec10d48, + // 2^196 * 3 * G + 0x7d38a1c20bb2089d, + 0x808334e196ccd412, + 0xc4a70b8c6c97d313, + 0x2eacf8bc03007f20, + 0xf0adf3c9cbca047d, + 0x81c3b2cbf4552f6b, + 0xcfda112d44735f93, + 0x1f23a0c77e20048c, + 0xf235467be5bc1570, + 0x03d2d9020dbab38c, + 0x27529aa2fcf9e09e, + 0x0840bef29d34bc50, + // 2^196 * 4 * G + 0x796dfb35dc10b287, + 0x27176bcd5c7ff29d, + 0x7f3d43e8c7b24905, + 0x0304f5a191c54276, + 0xcd54e06b7f37e4eb, + 0x8cc15f87f5e96cca, + 0xb8248bb0d3597dce, + 0x246affa06074400c, + 0x37d88e68fbe45321, + 0x86097548c0d75032, + 0x4e9b13ef894a0d35, + 0x25a83cac5753d325, + // 2^196 * 5 * G + 0x10222f48eed8165e, + 0x623fc1234b8bcf3a, + 0x1e145c09c221e8f0, + 0x7ccfa59fca782630, + 0x9f0f66293952b6e2, + 0x33db5e0e0934267b, + 0xff45252bd609fedc, + 0x06be10f5c506e0c9, + 0x1a9615a9b62a345f, + 0x22050c564a52fecc, + 0xa7a2788528bc0dfe, + 0x5e82770a1a1ee71d, + // 2^196 * 6 * G + 0x35425183ad896a5c, + 0xe8673afbe78d52f6, + 0x2c66f25f92a35f64, + 0x09d04f3b3b86b102, + 0xe802e80a42339c74, + 0x34175166a7fffae5, + 0x34865d1f1c408cae, + 0x2cca982c605bc5ee, + 0xfd2d5d35197dbe6e, + 0x207c2eea8be4ffa3, + 0x2613d8db325ae918, + 0x7a325d1727741d3e, + // 2^196 * 7 * G + 0xd036b9bbd16dfde2, + 0xa2055757c497a829, + 0x8e6cc966a7f12667, + 0x4d3b1a791239c180, + 0xecd27d017e2a076a, + 0xd788689f1636495e, + 0x52a61af0919233e5, + 0x2a479df17bb1ae64, + 0x9e5eee8e33db2710, + 0x189854ded6c43ca5, + 0xa41c22c592718138, + 0x27ad5538a43a5e9b, + // 2^196 * 8 * G + 0x2746dd4b15350d61, + 0xd03fcbc8ee9521b7, + 0xe86e365a138672ca, + 0x510e987f7e7d89e2, + 0xcb5a7d638e47077c, + 0x8db7536120a1c059, + 0x549e1e4d8bedfdcc, + 0x080153b7503b179d, + 0xdda69d930a3ed3e3, + 0x3d386ef1cd60a722, + 0xc817ad58bdaa4ee6, + 0x23be8d554fe7372a, + // 2^200 * 1 * G + 0x95fe919a74ef4fad, + 0x3a827becf6a308a2, + 0x964e01d309a47b01, + 0x71c43c4f5ba3c797, + 0xbc1ef4bd567ae7a9, + 0x3f624cb2d64498bd, + 0xe41064d22c1f4ec8, + 0x2ef9c5a5ba384001, + 0xb6fd6df6fa9e74cd, + 0xf18278bce4af267a, + 0x8255b3d0f1ef990e, + 0x5a758ca390c5f293, + // 2^200 * 2 * G + 0xa2b72710d9462495, + 0x3aa8c6d2d57d5003, + 0xe3d400bfa0b487ca, + 0x2dbae244b3eb72ec, + 0x8ce0918b1d61dc94, + 0x8ded36469a813066, + 0xd4e6a829afe8aad3, + 0x0a738027f639d43f, + 0x980f4a2f57ffe1cc, + 0x00670d0de1839843, + 0x105c3f4a49fb15fd, + 0x2698ca635126a69c, + // 2^200 * 3 * G + 0xe765318832b0ba78, + 0x381831f7925cff8b, + 0x08a81b91a0291fcc, + 0x1fb43dcc49caeb07, + 0x2e3d702f5e3dd90e, + 0x9e3f0918e4d25386, + 0x5e773ef6024da96a, + 0x3c004b0c4afa3332, + 0x9aa946ac06f4b82b, + 0x1ca284a5a806c4f3, + 0x3ed3265fc6cd4787, + 0x6b43fd01cd1fd217, + // 2^200 * 4 * G + 0xc7a75d4b4697c544, + 0x15fdf848df0fffbf, + 0x2868b9ebaa46785a, + 0x5a68d7105b52f714, + 0xb5c742583e760ef3, + 0x75dc52b9ee0ab990, + 0xbf1427c2072b923f, + 0x73420b2d6ff0d9f0, + 0xaf2cf6cb9e851e06, + 0x8f593913c62238c4, + 0xda8ab89699fbf373, + 0x3db5632fea34bc9e, + // 2^200 * 5 * G + 0xf46eee2bf75dd9d8, + 0x0d17b1f6396759a5, + 0x1bf2d131499e7273, + 0x04321adf49d75f13, + 0x2e4990b1829825d5, + 0xedeaeb873e9a8991, + 0xeef03d394c704af8, + 0x59197ea495df2b0e, + 0x04e16019e4e55aae, + 0xe77b437a7e2f92e9, + 0xc7ce2dc16f159aa4, + 0x45eafdc1f4d70cc0, + // 2^200 * 6 * G + 0x698401858045d72b, + 0x4c22faa2cf2f0651, + 0x941a36656b222dc6, + 0x5a5eebc80362dade, + 0xb60e4624cfccb1ed, + 0x59dbc292bd5c0395, + 0x31a09d1ddc0481c9, + 0x3f73ceea5d56d940, + 0xb7a7bfd10a4e8dc6, + 0xbe57007e44c9b339, + 0x60c1207f1557aefa, + 0x26058891266218db, + // 2^200 * 7 * G + 0x59f704a68360ff04, + 0xc3d93fde7661e6f4, + 0x831b2a7312873551, + 0x54ad0c2e4e615d57, + 0x4c818e3cc676e542, + 0x5e422c9303ceccad, + 0xec07cccab4129f08, + 0x0dedfa10b24443b8, + 0xee3b67d5b82b522a, + 0x36f163469fa5c1eb, + 0xa5b4d2f26ec19fd3, + 0x62ecb2baa77a9408, + // 2^200 * 8 * G + 0xe5ed795261152b3d, + 0x4962357d0eddd7d1, + 0x7482c8d0b96b4c71, + 0x2e59f919a966d8be, + 0x92072836afb62874, + 0x5fcd5e8579e104a5, + 0x5aad01adc630a14a, + 0x61913d5075663f98, + 0x0dc62d361a3231da, + 0xfa47583294200270, + 0x02d801513f9594ce, + 0x3ddbc2a131c05d5c, + // 2^204 * 1 * G + 0x3f50a50a4ffb81ef, + 0xb1e035093bf420bf, + 0x9baa8e1cc6aa2cd0, + 0x32239861fa237a40, + 0xfb735ac2004a35d1, + 0x31de0f433a6607c3, + 0x7b8591bfc528d599, + 0x55be9a25f5bb050c, + 0x0d005acd33db3dbf, + 0x0111b37c80ac35e2, + 0x4892d66c6f88ebeb, + 0x770eadb16508fbcd, + // 2^204 * 2 * G + 0x8451f9e05e4e89dd, + 0xc06302ffbc793937, + 0x5d22749556a6495c, + 0x09a6755ca05603fb, + 0xf1d3b681a05071b9, + 0x2207659a3592ff3a, + 0x5f0169297881e40e, + 0x16bedd0e86ba374e, + 0x5ecccc4f2c2737b5, + 0x43b79e0c2dccb703, + 0x33e008bc4ec43df3, + 0x06c1b840f07566c0, + // 2^204 * 3 * G + 0x7688a5c6a388f877, + 0x02a96c14deb2b6ac, + 0x64c9f3431b8c2af8, + 0x3628435554a1eed6, + 0x69ee9e7f9b02805c, + 0xcbff828a547d1640, + 0x3d93a869b2430968, + 0x46b7b8cd3fe26972, + 0xe9812086fe7eebe0, + 0x4cba6be72f515437, + 0x1d04168b516efae9, + 0x5ea1391043982cb9, + // 2^204 * 4 * G + 0x49125c9cf4702ee1, + 0x4520b71f8b25b32d, + 0x33193026501fef7e, + 0x656d8997c8d2eb2b, + 0x6f2b3be4d5d3b002, + 0xafec33d96a09c880, + 0x035f73a4a8bcc4cc, + 0x22c5b9284662198b, + 0xcb58c8fe433d8939, + 0x89a0cb2e6a8d7e50, + 0x79ca955309fbbe5a, + 0x0c626616cd7fc106, + // 2^204 * 5 * G + 0x1ffeb80a4879b61f, + 0x6396726e4ada21ed, + 0x33c7b093368025ba, + 0x471aa0c6f3c31788, + 0x8fdfc379fbf454b1, + 0x45a5a970f1a4b771, + 0xac921ef7bad35915, + 0x42d088dca81c2192, + 0x8fda0f37a0165199, + 0x0adadb77c8a0e343, + 0x20fbfdfcc875e820, + 0x1cf2bea80c2206e7, + // 2^204 * 6 * G + 0xc2ddf1deb36202ac, + 0x92a5fe09d2e27aa5, + 0x7d1648f6fc09f1d3, + 0x74c2cc0513bc4959, + 0x982d6e1a02c0412f, + 0x90fa4c83db58e8fe, + 0x01c2f5bcdcb18bc0, + 0x686e0c90216abc66, + 0x1fadbadba54395a7, + 0xb41a02a0ae0da66a, + 0xbf19f598bba37c07, + 0x6a12b8acde48430d, + // 2^204 * 7 * G + 0xf8daea1f39d495d9, + 0x592c190e525f1dfc, + 0xdb8cbd04c9991d1b, + 0x11f7fda3d88f0cb7, + 0x793bdd801aaeeb5f, + 0x00a2a0aac1518871, + 0xe8a373a31f2136b4, + 0x48aab888fc91ef19, + 0x041f7e925830f40e, + 0x002d6ca979661c06, + 0x86dc9ff92b046a2e, + 0x760360928b0493d1, + // 2^204 * 8 * G + 0x21bb41c6120cf9c6, + 0xeab2aa12decda59b, + 0xc1a72d020aa48b34, + 0x215d4d27e87d3b68, + 0xb43108e5695a0b05, + 0x6cb00ee8ad37a38b, + 0x5edad6eea3537381, + 0x3f2602d4b6dc3224, + 0xc8b247b65bcaf19c, + 0x49779dc3b1b2c652, + 0x89a180bbd5ece2e2, + 0x13f098a3cec8e039, + // 2^208 * 1 * G + 0x9adc0ff9ce5ec54b, + 0x039c2a6b8c2f130d, + 0x028007c7f0f89515, + 0x78968314ac04b36b, + 0xf3aa57a22796bb14, + 0x883abab79b07da21, + 0xe54be21831a0391c, + 0x5ee7fb38d83205f9, + 0x538dfdcb41446a8e, + 0xa5acfda9434937f9, + 0x46af908d263c8c78, + 0x61d0633c9bca0d09, + // 2^208 * 2 * G + 0x63744935ffdb2566, + 0xc5bd6b89780b68bb, + 0x6f1b3280553eec03, + 0x6e965fd847aed7f5, + 0xada328bcf8fc73df, + 0xee84695da6f037fc, + 0x637fb4db38c2a909, + 0x5b23ac2df8067bdc, + 0x9ad2b953ee80527b, + 0xe88f19aafade6d8d, + 0x0e711704150e82cf, + 0x79b9bbb9dd95dedc, + // 2^208 * 3 * G + 0xebb355406a3126c2, + 0xd26383a868c8c393, + 0x6c0c6429e5b97a82, + 0x5065f158c9fd2147, + 0xd1997dae8e9f7374, + 0xa032a2f8cfbb0816, + 0xcd6cba126d445f0a, + 0x1ba811460accb834, + 0x708169fb0c429954, + 0xe14600acd76ecf67, + 0x2eaab98a70e645ba, + 0x3981f39e58a4faf2, + // 2^208 * 4 * G + 0x18fb8a7559230a93, + 0x1d168f6960e6f45d, + 0x3a85a94514a93cb5, + 0x38dc083705acd0fd, + 0xc845dfa56de66fde, + 0xe152a5002c40483a, + 0xe9d2e163c7b4f632, + 0x30f4452edcbc1b65, + 0x856d2782c5759740, + 0xfa134569f99cbecc, + 0x8844fc73c0ea4e71, + 0x632d9a1a593f2469, + // 2^208 * 5 * G + 0xf6bb6b15b807cba6, + 0x1823c7dfbc54f0d7, + 0xbb1d97036e29670b, + 0x0b24f48847ed4a57, + 0xbf09fd11ed0c84a7, + 0x63f071810d9f693a, + 0x21908c2d57cf8779, + 0x3a5a7df28af64ba2, + 0xdcdad4be511beac7, + 0xa4538075ed26ccf2, + 0xe19cff9f005f9a65, + 0x34fcf74475481f63, + // 2^208 * 6 * G + 0xc197e04c789767ca, + 0xb8714dcb38d9467d, + 0x55de888283f95fa8, + 0x3d3bdc164dfa63f7, + 0xa5bb1dab78cfaa98, + 0x5ceda267190b72f2, + 0x9309c9110a92608e, + 0x0119a3042fb374b0, + 0x67a2d89ce8c2177d, + 0x669da5f66895d0c1, + 0xf56598e5b282a2b0, + 0x56c088f1ede20a73, + // 2^208 * 7 * G + 0x336d3d1110a86e17, + 0xd7f388320b75b2fa, + 0xf915337625072988, + 0x09674c6b99108b87, + 0x581b5fac24f38f02, + 0xa90be9febae30cbd, + 0x9a2169028acf92f0, + 0x038b7ea48359038f, + 0x9f4ef82199316ff8, + 0x2f49d282eaa78d4f, + 0x0971a5ab5aef3174, + 0x6e5e31025969eb65, + // 2^208 * 8 * G + 0xb16c62f587e593fb, + 0x4999eddeca5d3e71, + 0xb491c1e014cc3e6d, + 0x08f5114789a8dba8, + 0x3304fb0e63066222, + 0xfb35068987acba3f, + 0xbd1924778c1061a3, + 0x3058ad43d1838620, + 0x323c0ffde57663d0, + 0x05c3df38a22ea610, + 0xbdc78abdac994f9a, + 0x26549fa4efe3dc99, + // 2^212 * 1 * G + 0x738b38d787ce8f89, + 0xb62658e24179a88d, + 0x30738c9cf151316d, + 0x49128c7f727275c9, + 0x04dbbc17f75396b9, + 0x69e6a2d7d2f86746, + 0xc6409d99f53eabc6, + 0x606175f6332e25d2, + 0x4021370ef540e7dd, + 0x0910d6f5a1f1d0a5, + 0x4634aacd5b06b807, + 0x6a39e6356944f235, + // 2^212 * 2 * G + 0x96cd5640df90f3e7, + 0x6c3a760edbfa25ea, + 0x24f3ef0959e33cc4, + 0x42889e7e530d2e58, + 0x1da1965774049e9d, + 0xfbcd6ea198fe352b, + 0xb1cbcd50cc5236a6, + 0x1f5ec83d3f9846e2, + 0x8efb23c3328ccb75, + 0xaf42a207dd876ee9, + 0x20fbdadc5dfae796, + 0x241e246b06bf9f51, + // 2^212 * 3 * G + 0x29e68e57ad6e98f6, + 0x4c9260c80b462065, + 0x3f00862ea51ebb4b, + 0x5bc2c77fb38d9097, + 0x7eaafc9a6280bbb8, + 0x22a70f12f403d809, + 0x31ce40bb1bfc8d20, + 0x2bc65635e8bd53ee, + 0xe8d5dc9fa96bad93, + 0xe58fb17dde1947dc, + 0x681532ea65185fa3, + 0x1fdd6c3b034a7830, + // 2^212 * 4 * G + 0x0a64e28c55dc18fe, + 0xe3df9e993399ebdd, + 0x79ac432370e2e652, + 0x35ff7fc33ae4cc0e, + 0x9c13a6a52dd8f7a9, + 0x2dbb1f8c3efdcabf, + 0x961e32405e08f7b5, + 0x48c8a121bbe6c9e5, + 0xfc415a7c59646445, + 0xd224b2d7c128b615, + 0x6035c9c905fbb912, + 0x42d7a91274429fab, + // 2^212 * 5 * G + 0x4e6213e3eaf72ed3, + 0x6794981a43acd4e7, + 0xff547cde6eb508cb, + 0x6fed19dd10fcb532, + 0xa9a48947933da5bc, + 0x4a58920ec2e979ec, + 0x96d8800013e5ac4c, + 0x453692d74b48b147, + 0xdd775d99a8559c6f, + 0xf42a2140df003e24, + 0x5223e229da928a66, + 0x063f46ba6d38f22c, + // 2^212 * 6 * G + 0xd2d242895f536694, + 0xca33a2c542939b2c, + 0x986fada6c7ddb95c, + 0x5a152c042f712d5d, + 0x39843cb737346921, + 0xa747fb0738c89447, + 0xcb8d8031a245307e, + 0x67810f8e6d82f068, + 0x3eeb8fbcd2287db4, + 0x72c7d3a301a03e93, + 0x5473e88cbd98265a, + 0x7324aa515921b403, + // 2^212 * 7 * G + 0x857942f46c3cbe8e, + 0xa1d364b14730c046, + 0x1c8ed914d23c41bf, + 0x0838e161eef6d5d2, + 0xad23f6dae82354cb, + 0x6962502ab6571a6d, + 0x9b651636e38e37d1, + 0x5cac5005d1a3312f, + 0x8cc154cce9e39904, + 0x5b3a040b84de6846, + 0xc4d8a61cb1be5d6e, + 0x40fb897bd8861f02, + // 2^212 * 8 * G + 0x84c5aa9062de37a1, + 0x421da5000d1d96e1, + 0x788286306a9242d9, + 0x3c5e464a690d10da, + 0xe57ed8475ab10761, + 0x71435e206fd13746, + 0x342f824ecd025632, + 0x4b16281ea8791e7b, + 0xd1c101d50b813381, + 0xdee60f1176ee6828, + 0x0cb68893383f6409, + 0x6183c565f6ff484a, + // 2^216 * 1 * G + 0x741d5a461e6bf9d6, + 0x2305b3fc7777a581, + 0xd45574a26474d3d9, + 0x1926e1dc6401e0ff, + 0xdb468549af3f666e, + 0xd77fcf04f14a0ea5, + 0x3df23ff7a4ba0c47, + 0x3a10dfe132ce3c85, + 0xe07f4e8aea17cea0, + 0x2fd515463a1fc1fd, + 0x175322fd31f2c0f1, + 0x1fa1d01d861e5d15, + // 2^216 * 2 * G + 0xcc8055947d599832, + 0x1e4656da37f15520, + 0x99f6f7744e059320, + 0x773563bc6a75cf33, + 0x38dcac00d1df94ab, + 0x2e712bddd1080de9, + 0x7f13e93efdd5e262, + 0x73fced18ee9a01e5, + 0x06b1e90863139cb3, + 0xa493da67c5a03ecd, + 0x8d77cec8ad638932, + 0x1f426b701b864f44, + // 2^216 * 3 * G + 0xefc9264c41911c01, + 0xf1a3b7b817a22c25, + 0x5875da6bf30f1447, + 0x4e1af5271d31b090, + 0xf17e35c891a12552, + 0xb76b8153575e9c76, + 0xfa83406f0d9b723e, + 0x0b76bb1b3fa7e438, + 0x08b8c1f97f92939b, + 0xbe6771cbd444ab6e, + 0x22e5646399bb8017, + 0x7b6dd61eb772a955, + // 2^216 * 4 * G + 0xb7adc1e850f33d92, + 0x7998fa4f608cd5cf, + 0xad962dbd8dfc5bdb, + 0x703e9bceaf1d2f4f, + 0x5730abf9ab01d2c7, + 0x16fb76dc40143b18, + 0x866cbe65a0cbb281, + 0x53fa9b659bff6afe, + 0x6c14c8e994885455, + 0x843a5d6665aed4e5, + 0x181bb73ebcd65af1, + 0x398d93e5c4c61f50, + // 2^216 * 5 * G + 0x1c4bd16733e248f3, + 0xbd9e128715bf0a5f, + 0xd43f8cf0a10b0376, + 0x53b09b5ddf191b13, + 0xc3877c60d2e7e3f2, + 0x3b34aaa030828bb1, + 0x283e26e7739ef138, + 0x699c9c9002c30577, + 0xf306a7235946f1cc, + 0x921718b5cce5d97d, + 0x28cdd24781b4e975, + 0x51caf30c6fcdd907, + // 2^216 * 6 * G + 0xa60ba7427674e00a, + 0x630e8570a17a7bf3, + 0x3758563dcf3324cc, + 0x5504aa292383fdaa, + 0x737af99a18ac54c7, + 0x903378dcc51cb30f, + 0x2b89bc334ce10cc7, + 0x12ae29c189f8e99a, + 0xa99ec0cb1f0d01cf, + 0x0dd1efcc3a34f7ae, + 0x55ca7521d09c4e22, + 0x5fd14fe958eba5ea, + // 2^216 * 7 * G + 0xb5dc2ddf2845ab2c, + 0x069491b10a7fe993, + 0x4daaf3d64002e346, + 0x093ff26e586474d1, + 0x3c42fe5ebf93cb8e, + 0xbedfa85136d4565f, + 0xe0f0859e884220e8, + 0x7dd73f960725d128, + 0xb10d24fe68059829, + 0x75730672dbaf23e5, + 0x1367253ab457ac29, + 0x2f59bcbc86b470a4, + // 2^216 * 8 * G + 0x83847d429917135f, + 0xad1b911f567d03d7, + 0x7e7748d9be77aad1, + 0x5458b42e2e51af4a, + 0x7041d560b691c301, + 0x85201b3fadd7e71e, + 0x16c2e16311335585, + 0x2aa55e3d010828b1, + 0xed5192e60c07444f, + 0x42c54e2d74421d10, + 0x352b4c82fdb5c864, + 0x13e9004a8a768664, + // 2^220 * 1 * G + 0xcbb5b5556c032bff, + 0xdf7191b729297a3a, + 0xc1ff7326aded81bb, + 0x71ade8bb68be03f5, + 0x1e6284c5806b467c, + 0xc5f6997be75d607b, + 0x8b67d958b378d262, + 0x3d88d66a81cd8b70, + 0x8b767a93204ed789, + 0x762fcacb9fa0ae2a, + 0x771febcc6dce4887, + 0x343062158ff05fb3, + // 2^220 * 2 * G + 0xe05da1a7e1f5bf49, + 0x26457d6dd4736092, + 0x77dcb07773cc32f6, + 0x0a5d94969cdd5fcd, + 0xfce219072a7b31b4, + 0x4d7adc75aa578016, + 0x0ec276a687479324, + 0x6d6d9d5d1fda4beb, + 0x22b1a58ae9b08183, + 0xfd95d071c15c388b, + 0xa9812376850a0517, + 0x33384cbabb7f335e, + // 2^220 * 3 * G + 0x3c6fa2680ca2c7b5, + 0x1b5082046fb64fda, + 0xeb53349c5431d6de, + 0x5278b38f6b879c89, + 0x33bc627a26218b8d, + 0xea80b21fc7a80c61, + 0x9458b12b173e9ee6, + 0x076247be0e2f3059, + 0x52e105f61416375a, + 0xec97af3685abeba4, + 0x26e6b50623a67c36, + 0x5cf0e856f3d4fb01, + // 2^220 * 4 * G + 0xf6c968731ae8cab4, + 0x5e20741ecb4f92c5, + 0x2da53be58ccdbc3e, + 0x2dddfea269970df7, + 0xbeaece313db342a8, + 0xcba3635b842db7ee, + 0xe88c6620817f13ef, + 0x1b9438aa4e76d5c6, + 0x8a50777e166f031a, + 0x067b39f10fb7a328, + 0x1925c9a6010fbd76, + 0x6df9b575cc740905, + // 2^220 * 5 * G + 0x42c1192927f6bdcf, + 0x8f91917a403d61ca, + 0xdc1c5a668b9e1f61, + 0x1596047804ec0f8d, + 0xecdfc35b48cade41, + 0x6a88471fb2328270, + 0x740a4a2440a01b6a, + 0x471e5796003b5f29, + 0xda96bbb3aced37ac, + 0x7a2423b5e9208cea, + 0x24cc5c3038aebae2, + 0x50c356afdc5dae2f, + // 2^220 * 6 * G + 0x09dcbf4341c30318, + 0xeeba061183181dce, + 0xc179c0cedc1e29a1, + 0x1dbf7b89073f35b0, + 0xcfed9cdf1b31b964, + 0xf486a9858ca51af3, + 0x14897265ea8c1f84, + 0x784a53dd932acc00, + 0x2d99f9df14fc4920, + 0x76ccb60cc4499fe5, + 0xa4132cbbe5cf0003, + 0x3f93d82354f000ea, + // 2^220 * 7 * G + 0x8183e7689e04ce85, + 0x678fb71e04465341, + 0xad92058f6688edac, + 0x5da350d3532b099a, + 0xeaac12d179e14978, + 0xff923ff3bbebff5e, + 0x4af663e40663ce27, + 0x0fd381a811a5f5ff, + 0xf256aceca436df54, + 0x108b6168ae69d6e8, + 0x20d986cb6b5d036c, + 0x655957b9fee2af50, + // 2^220 * 8 * G + 0xaea8b07fa902030f, + 0xf88c766af463d143, + 0x15b083663c787a60, + 0x08eab1148267a4a8, + 0xbdc1409bd002d0ac, + 0x66660245b5ccd9a6, + 0x82317dc4fade85ec, + 0x02fe934b6ad7df0d, + 0xef5cf100cfb7ea74, + 0x22897633a1cb42ac, + 0xd4ce0c54cef285e2, + 0x30408c048a146a55, + // 2^224 * 1 * G + 0x739d8845832fcedb, + 0xfa38d6c9ae6bf863, + 0x32bc0dcab74ffef7, + 0x73937e8814bce45e, + 0xbb2e00c9193b877f, + 0xece3a890e0dc506b, + 0xecf3b7c036de649f, + 0x5f46040898de9e1a, + 0xb9037116297bf48d, + 0xa9d13b22d4f06834, + 0xe19715574696bdc6, + 0x2cf8a4e891d5e835, + // 2^224 * 2 * G + 0x6d93fd8707110f67, + 0xdd4c09d37c38b549, + 0x7cb16a4cc2736a86, + 0x2049bd6e58252a09, + 0x2cb5487e17d06ba2, + 0x24d2381c3950196b, + 0xd7659c8185978a30, + 0x7a6f7f2891d6a4f6, + 0x7d09fd8d6a9aef49, + 0xf0ee60be5b3db90b, + 0x4c21b52c519ebfd4, + 0x6011aadfc545941d, + // 2^224 * 3 * G + 0x5f67926dcf95f83c, + 0x7c7e856171289071, + 0xd6a1e7f3998f7a5b, + 0x6fc5cc1b0b62f9e0, + 0x63ded0c802cbf890, + 0xfbd098ca0dff6aaa, + 0x624d0afdb9b6ed99, + 0x69ce18b779340b1e, + 0xd1ef5528b29879cb, + 0xdd1aae3cd47e9092, + 0x127e0442189f2352, + 0x15596b3ae57101f1, + // 2^224 * 4 * G + 0x462739d23f9179a2, + 0xff83123197d6ddcf, + 0x1307deb553f2148a, + 0x0d2237687b5f4dda, + 0x09ff31167e5124ca, + 0x0be4158bd9c745df, + 0x292b7d227ef556e5, + 0x3aa4e241afb6d138, + 0x2cc138bf2a3305f5, + 0x48583f8fa2e926c3, + 0x083ab1a25549d2eb, + 0x32fcaa6e4687a36c, + // 2^224 * 5 * G + 0x7bc56e8dc57d9af5, + 0x3e0bd2ed9df0bdf2, + 0xaac014de22efe4a3, + 0x4627e9cefebd6a5c, + 0x3207a4732787ccdf, + 0x17e31908f213e3f8, + 0xd5b2ecd7f60d964e, + 0x746f6336c2600be9, + 0x3f4af345ab6c971c, + 0xe288eb729943731f, + 0x33596a8a0344186d, + 0x7b4917007ed66293, + // 2^224 * 6 * G + 0x2d85fb5cab84b064, + 0x497810d289f3bc14, + 0x476adc447b15ce0c, + 0x122ba376f844fd7b, + 0x54341b28dd53a2dd, + 0xaa17905bdf42fc3f, + 0x0ff592d94dd2f8f4, + 0x1d03620fe08cd37d, + 0xc20232cda2b4e554, + 0x9ed0fd42115d187f, + 0x2eabb4be7dd479d9, + 0x02c70bf52b68ec4c, + // 2^224 * 7 * G + 0xa287ec4b5d0b2fbb, + 0x415c5790074882ca, + 0xe044a61ec1d0815c, + 0x26334f0a409ef5e0, + 0xace532bf458d72e1, + 0x5be768e07cb73cb5, + 0x56cf7d94ee8bbde7, + 0x6b0697e3feb43a03, + 0xb6c8f04adf62a3c0, + 0x3ef000ef076da45d, + 0x9c9cb95849f0d2a9, + 0x1cc37f43441b2fae, + // 2^224 * 8 * G + 0x508f565a5cc7324f, + 0xd061c4c0e506a922, + 0xfb18abdb5c45ac19, + 0x6c6809c10380314a, + 0xd76656f1c9ceaeb9, + 0x1c5b15f818e5656a, + 0x26e72832844c2334, + 0x3a346f772f196838, + 0xd2d55112e2da6ac8, + 0xe9bd0331b1e851ed, + 0x960746dd8ec67262, + 0x05911b9f6ef7c5d0, + // 2^228 * 1 * G + 0xe9dcd756b637ff2d, + 0xec4c348fc987f0c4, + 0xced59285f3fbc7b7, + 0x3305354793e1ea87, + 0x01c18980c5fe9f94, + 0xcd656769716fd5c8, + 0x816045c3d195a086, + 0x6e2b7f3266cc7982, + 0xcc802468f7c3568f, + 0x9de9ba8219974cb3, + 0xabb7229cb5b81360, + 0x44e2017a6fbeba62, + // 2^228 * 2 * G + 0xc4c2a74354dab774, + 0x8e5d4c3c4eaf031a, + 0xb76c23d242838f17, + 0x749a098f68dce4ea, + 0x87f82cf3b6ca6ecd, + 0x580f893e18f4a0c2, + 0x058930072604e557, + 0x6cab6ac256d19c1d, + 0xdcdfe0a02cc1de60, + 0x032665ff51c5575b, + 0x2c0c32f1073abeeb, + 0x6a882014cd7b8606, + // 2^228 * 3 * G + 0xa52a92fea4747fb5, + 0xdc12a4491fa5ab89, + 0xd82da94bb847a4ce, + 0x4d77edce9512cc4e, + 0xd111d17caf4feb6e, + 0x050bba42b33aa4a3, + 0x17514c3ceeb46c30, + 0x54bedb8b1bc27d75, + 0x77c8e14577e2189c, + 0xa3e46f6aff99c445, + 0x3144dfc86d335343, + 0x3a96559e7c4216a9, + // 2^228 * 4 * G + 0x12550d37f42ad2ee, + 0x8b78e00498a1fbf5, + 0x5d53078233894cb2, + 0x02c84e4e3e498d0c, + 0x4493896880baaa52, + 0x4c98afc4f285940e, + 0xef4aa79ba45448b6, + 0x5278c510a57aae7f, + 0xa54dd074294c0b94, + 0xf55d46b8df18ffb6, + 0xf06fecc58dae8366, + 0x588657668190d165, + // 2^228 * 5 * G + 0xd47712311aef7117, + 0x50343101229e92c7, + 0x7a95e1849d159b97, + 0x2449959b8b5d29c9, + 0xbf5834f03de25cc3, + 0xb887c8aed6815496, + 0x5105221a9481e892, + 0x6760ed19f7723f93, + 0x669ba3b7ac35e160, + 0x2eccf73fba842056, + 0x1aec1f17c0804f07, + 0x0d96bc031856f4e7, + // 2^228 * 6 * G + 0x3318be7775c52d82, + 0x4cb764b554d0aab9, + 0xabcf3d27cc773d91, + 0x3bf4d1848123288a, + 0xb1d534b0cc7505e1, + 0x32cd003416c35288, + 0xcb36a5800762c29d, + 0x5bfe69b9237a0bf8, + 0x183eab7e78a151ab, + 0xbbe990c999093763, + 0xff717d6e4ac7e335, + 0x4c5cddb325f39f88, + // 2^228 * 7 * G + 0xc0f6b74d6190a6eb, + 0x20ea81a42db8f4e4, + 0xa8bd6f7d97315760, + 0x33b1d60262ac7c21, + 0x57750967e7a9f902, + 0x2c37fdfc4f5b467e, + 0xb261663a3177ba46, + 0x3a375e78dc2d532b, + 0x8141e72f2d4dddea, + 0xe6eafe9862c607c8, + 0x23c28458573cafd0, + 0x46b9476f4ff97346, + // 2^228 * 8 * G + 0x0c1ffea44f901e5c, + 0x2b0b6fb72184b782, + 0xe587ff910114db88, + 0x37130f364785a142, + 0x1215505c0d58359f, + 0x2a2013c7fc28c46b, + 0x24a0a1af89ea664e, + 0x4400b638a1130e1f, + 0x3a01b76496ed19c3, + 0x31e00ab0ed327230, + 0x520a885783ca15b1, + 0x06aab9875accbec7, + // 2^232 * 1 * G + 0xc1339983f5df0ebb, + 0xc0f3758f512c4cac, + 0x2cf1130a0bb398e1, + 0x6b3cecf9aa270c62, + 0x5349acf3512eeaef, + 0x20c141d31cc1cb49, + 0x24180c07a99a688d, + 0x555ef9d1c64b2d17, + 0x36a770ba3b73bd08, + 0x624aef08a3afbf0c, + 0x5737ff98b40946f2, + 0x675f4de13381749d, + // 2^232 * 2 * G + 0x0e2c52036b1782fc, + 0x64816c816cad83b4, + 0xd0dcbdd96964073e, + 0x13d99df70164c520, + 0xa12ff6d93bdab31d, + 0x0725d80f9d652dfe, + 0x019c4ff39abe9487, + 0x60f450b882cd3c43, + 0x014b5ec321e5c0ca, + 0x4fcb69c9d719bfa2, + 0x4e5f1c18750023a0, + 0x1c06de9e55edac80, + // 2^232 * 3 * G + 0x990f7ad6a33ec4e2, + 0x6608f938be2ee08e, + 0x9ca143c563284515, + 0x4cf38a1fec2db60d, + 0xffd52b40ff6d69aa, + 0x34530b18dc4049bb, + 0x5e4a5c2fa34d9897, + 0x78096f8e7d32ba2d, + 0xa0aaaa650dfa5ce7, + 0xf9c49e2a48b5478c, + 0x4f09cc7d7003725b, + 0x373cad3a26091abe, + // 2^232 * 4 * G + 0xb294634d82c9f57c, + 0x1fcbfde124934536, + 0x9e9c4db3418cdb5a, + 0x0040f3d9454419fc, + 0xf1bea8fb89ddbbad, + 0x3bcb2cbc61aeaecb, + 0x8f58a7bb1f9b8d9d, + 0x21547eda5112a686, + 0xdefde939fd5986d3, + 0xf4272c89510a380c, + 0xb72ba407bb3119b9, + 0x63550a334a254df4, + // 2^232 * 5 * G + 0x6507d6edb569cf37, + 0x178429b00ca52ee1, + 0xea7c0090eb6bd65d, + 0x3eea62c7daf78f51, + 0x9bba584572547b49, + 0xf305c6fae2c408e0, + 0x60e8fa69c734f18d, + 0x39a92bafaa7d767a, + 0x9d24c713e693274e, + 0x5f63857768dbd375, + 0x70525560eb8ab39a, + 0x68436a0665c9c4cd, + // 2^232 * 6 * G + 0xbc0235e8202f3f27, + 0xc75c00e264f975b0, + 0x91a4e9d5a38c2416, + 0x17b6e7f68ab789f9, + 0x1e56d317e820107c, + 0xc5266844840ae965, + 0xc1e0a1c6320ffc7a, + 0x5373669c91611472, + 0x5d2814ab9a0e5257, + 0x908f2084c9cab3fc, + 0xafcaf5885b2d1eca, + 0x1cb4b5a678f87d11, + // 2^232 * 7 * G + 0xb664c06b394afc6c, + 0x0c88de2498da5fb1, + 0x4f8d03164bcad834, + 0x330bca78de7434a2, + 0x6b74aa62a2a007e7, + 0xf311e0b0f071c7b1, + 0x5707e438000be223, + 0x2dc0fd2d82ef6eac, + 0x982eff841119744e, + 0xf9695e962b074724, + 0xc58ac14fbfc953fb, + 0x3c31be1b369f1cf5, + // 2^232 * 8 * G + 0xb0f4864d08948aee, + 0x07dc19ee91ba1c6f, + 0x7975cdaea6aca158, + 0x330b61134262d4bb, + 0xc168bc93f9cb4272, + 0xaeb8711fc7cedb98, + 0x7f0e52aa34ac8d7a, + 0x41cec1097e7d55bb, + 0xf79619d7a26d808a, + 0xbb1fd49e1d9e156d, + 0x73d7c36cdba1df27, + 0x26b44cd91f28777d, + // 2^236 * 1 * G + 0x300a9035393aa6d8, + 0x2b501131a12bb1cd, + 0x7b1ff677f093c222, + 0x4309c1f8cab82bad, + 0xaf44842db0285f37, + 0x8753189047efc8df, + 0x9574e091f820979a, + 0x0e378d6069615579, + 0xd9fa917183075a55, + 0x4bdb5ad26b009fdc, + 0x7829ad2cd63def0e, + 0x078fc54975fd3877, + // 2^236 * 2 * G + 0x87dfbd1428878f2d, + 0x134636dd1e9421a1, + 0x4f17c951257341a3, + 0x5df98d4bad296cb8, + 0xe2004b5bb833a98a, + 0x44775dec2d4c3330, + 0x3aa244067eace913, + 0x272630e3d58e00a9, + 0xf3678fd0ecc90b54, + 0xf001459b12043599, + 0x26725fbc3758b89b, + 0x4325e4aa73a719ae, + // 2^236 * 3 * G + 0x657dc6ef433c3493, + 0x65375e9f80dbf8c3, + 0x47fd2d465b372dae, + 0x4966ab79796e7947, + 0xed24629acf69f59d, + 0x2a4a1ccedd5abbf4, + 0x3535ca1f56b2d67b, + 0x5d8c68d043b1b42d, + 0xee332d4de3b42b0a, + 0xd84e5a2b16a4601c, + 0x78243877078ba3e4, + 0x77ed1eb4184ee437, + // 2^236 * 4 * G + 0xbfd4e13f201839a0, + 0xaeefffe23e3df161, + 0xb65b04f06b5d1fe3, + 0x52e085fb2b62fbc0, + 0x185d43f89e92ed1a, + 0xb04a1eeafe4719c6, + 0x499fbe88a6f03f4f, + 0x5d8b0d2f3c859bdd, + 0x124079eaa54cf2ba, + 0xd72465eb001b26e7, + 0x6843bcfdc97af7fd, + 0x0524b42b55eacd02, + // 2^236 * 5 * G + 0xfd0d5dbee45447b0, + 0x6cec351a092005ee, + 0x99a47844567579cb, + 0x59d242a216e7fa45, + 0xbc18dcad9b829eac, + 0x23ae7d28b5f579d0, + 0xc346122a69384233, + 0x1a6110b2e7d4ac89, + 0x4f833f6ae66997ac, + 0x6849762a361839a4, + 0x6985dec1970ab525, + 0x53045e89dcb1f546, + // 2^236 * 6 * G + 0xcb8bb346d75353db, + 0xfcfcb24bae511e22, + 0xcba48d40d50ae6ef, + 0x26e3bae5f4f7cb5d, + 0x84da3cde8d45fe12, + 0xbd42c218e444e2d2, + 0xa85196781f7e3598, + 0x7642c93f5616e2b2, + 0x2323daa74595f8e4, + 0xde688c8b857abeb4, + 0x3fc48e961c59326e, + 0x0b2e73ca15c9b8ba, + // 2^236 * 7 * G + 0xd6bb4428c17f5026, + 0x9eb27223fb5a9ca7, + 0xe37ba5031919c644, + 0x21ce380db59a6602, + 0x0e3fbfaf79c03a55, + 0x3077af054cbb5acf, + 0xd5c55245db3de39f, + 0x015e68c1476a4af7, + 0xc1d5285220066a38, + 0x95603e523570aef3, + 0x832659a7226b8a4d, + 0x5dd689091f8eedc9, + // 2^236 * 8 * G + 0xcbac84debfd3c856, + 0x1624c348b35ff244, + 0xb7f88dca5d9cad07, + 0x3b0e574da2c2ebe8, + 0x1d022591a5313084, + 0xca2d4aaed6270872, + 0x86a12b852f0bfd20, + 0x56e6c439ad7da748, + 0xc704ff4942bdbae6, + 0x5e21ade2b2de1f79, + 0xe95db3f35652fad8, + 0x0822b5378f08ebc1, + // 2^240 * 1 * G + 0x51f048478f387475, + 0xb25dbcf49cbecb3c, + 0x9aab1244d99f2055, + 0x2c709e6c1c10a5d6, + 0xe1b7f29362730383, + 0x4b5279ffebca8a2c, + 0xdafc778abfd41314, + 0x7deb10149c72610f, + 0xcb62af6a8766ee7a, + 0x66cbec045553cd0e, + 0x588001380f0be4b5, + 0x08e68e9ff62ce2ea, + // 2^240 * 2 * G + 0x34ad500a4bc130ad, + 0x8d38db493d0bd49c, + 0xa25c3d98500a89be, + 0x2f1f3f87eeba3b09, + 0x2f2d09d50ab8f2f9, + 0xacb9218dc55923df, + 0x4a8f342673766cb9, + 0x4cb13bd738f719f5, + 0xf7848c75e515b64a, + 0xa59501badb4a9038, + 0xc20d313f3f751b50, + 0x19a1e353c0ae2ee8, + // 2^240 * 3 * G + 0x7d1c7560bafa05c3, + 0xb3e1a0a0c6e55e61, + 0xe3529718c0d66473, + 0x41546b11c20c3486, + 0xb42172cdd596bdbd, + 0x93e0454398eefc40, + 0x9fb15347b44109b5, + 0x736bd3990266ae34, + 0x85532d509334b3b4, + 0x46fd114b60816573, + 0xcc5f5f30425c8375, + 0x412295a2b87fab5c, + // 2^240 * 4 * G + 0x19c99b88f57ed6e9, + 0x5393cb266df8c825, + 0x5cee3213b30ad273, + 0x14e153ebb52d2e34, + 0x2e655261e293eac6, + 0x845a92032133acdb, + 0x460975cb7900996b, + 0x0760bb8d195add80, + 0x413e1a17cde6818a, + 0x57156da9ed69a084, + 0x2cbf268f46caccb1, + 0x6b34be9bc33ac5f2, + // 2^240 * 5 * G + 0xf3df2f643a78c0b2, + 0x4c3e971ef22e027c, + 0xec7d1c5e49c1b5a3, + 0x2012c18f0922dd2d, + 0x11fc69656571f2d3, + 0xc6c9e845530e737a, + 0xe33ae7a2d4fe5035, + 0x01b9c7b62e6dd30b, + 0x880b55e55ac89d29, + 0x1483241f45a0a763, + 0x3d36efdfc2e76c1f, + 0x08af5b784e4bade8, + // 2^240 * 6 * G + 0x283499dc881f2533, + 0x9d0525da779323b6, + 0x897addfb673441f4, + 0x32b79d71163a168d, + 0xe27314d289cc2c4b, + 0x4be4bd11a287178d, + 0x18d528d6fa3364ce, + 0x6423c1d5afd9826e, + 0xcc85f8d9edfcb36a, + 0x22bcc28f3746e5f9, + 0xe49de338f9e5d3cd, + 0x480a5efbc13e2dcc, + // 2^240 * 7 * G + 0x0b51e70b01622071, + 0x06b505cf8b1dafc5, + 0x2c6bb061ef5aabcd, + 0x47aa27600cb7bf31, + 0xb6614ce442ce221f, + 0x6e199dcc4c053928, + 0x663fb4a4dc1cbe03, + 0x24b31d47691c8e06, + 0x2a541eedc015f8c3, + 0x11a4fe7e7c693f7c, + 0xf0af66134ea278d6, + 0x545b585d14dda094, + // 2^240 * 8 * G + 0x67bf275ea0d43a0f, + 0xade68e34089beebe, + 0x4289134cd479e72e, + 0x0f62f9c332ba5454, + 0x6204e4d0e3b321e1, + 0x3baa637a28ff1e95, + 0x0b0ccffd5b99bd9e, + 0x4d22dc3e64c8d071, + 0xfcb46589d63b5f39, + 0x5cae6a3f57cbcf61, + 0xfebac2d2953afa05, + 0x1c0fa01a36371436, + // 2^244 * 1 * G + 0xe7547449bc7cd692, + 0x0f9abeaae6f73ddf, + 0x4af01ca700837e29, + 0x63ab1b5d3f1bc183, + 0xc11ee5e854c53fae, + 0x6a0b06c12b4f3ff4, + 0x33540f80e0b67a72, + 0x15f18fc3cd07e3ef, + 0x32750763b028f48c, + 0x06020740556a065f, + 0xd53bd812c3495b58, + 0x08706c9b865f508d, + // 2^244 * 2 * G + 0xf37ca2ab3d343dff, + 0x1a8c6a2d80abc617, + 0x8e49e035d4ccffca, + 0x48b46beebaa1d1b9, + 0xcc991b4138b41246, + 0x243b9c526f9ac26b, + 0xb9ef494db7cbabbd, + 0x5fba433dd082ed00, + 0x9c49e355c9941ad0, + 0xb9734ade74498f84, + 0x41c3fed066663e5c, + 0x0ecfedf8e8e710b3, + // 2^244 * 3 * G + 0x76430f9f9cd470d9, + 0xb62acc9ba42f6008, + 0x1898297c59adad5e, + 0x7789dd2db78c5080, + 0x744f7463e9403762, + 0xf79a8dee8dfcc9c9, + 0x163a649655e4cde3, + 0x3b61788db284f435, + 0xb22228190d6ef6b2, + 0xa94a66b246ce4bfa, + 0x46c1a77a4f0b6cc7, + 0x4236ccffeb7338cf, + // 2^244 * 4 * G + 0x8497404d0d55e274, + 0x6c6663d9c4ad2b53, + 0xec2fb0d9ada95734, + 0x2617e120cdb8f73c, + 0x3bd82dbfda777df6, + 0x71b177cc0b98369e, + 0x1d0e8463850c3699, + 0x5a71945b48e2d1f1, + 0x6f203dd5405b4b42, + 0x327ec60410b24509, + 0x9c347230ac2a8846, + 0x77de29fc11ffeb6a, + // 2^244 * 5 * G + 0xb0ac57c983b778a8, + 0x53cdcca9d7fe912c, + 0x61c2b854ff1f59dc, + 0x3a1a2cf0f0de7dac, + 0x835e138fecced2ca, + 0x8c9eaf13ea963b9a, + 0xc95fbfc0b2160ea6, + 0x575e66f3ad877892, + 0x99803a27c88fcb3a, + 0x345a6789275ec0b0, + 0x459789d0ff6c2be5, + 0x62f882651e70a8b2, + // 2^244 * 6 * G + 0x085ae2c759ff1be4, + 0x149145c93b0e40b7, + 0xc467e7fa7ff27379, + 0x4eeecf0ad5c73a95, + 0x6d822986698a19e0, + 0xdc9821e174d78a71, + 0x41a85f31f6cb1f47, + 0x352721c2bcda9c51, + 0x48329952213fc985, + 0x1087cf0d368a1746, + 0x8e5261b166c15aa5, + 0x2d5b2d842ed24c21, + // 2^244 * 7 * G + 0x02cfebd9ebd3ded1, + 0xd45b217739021974, + 0x7576f813fe30a1b7, + 0x5691b6f9a34ef6c2, + 0x5eb7d13d196ac533, + 0x377234ecdb80be2b, + 0xe144cffc7cf5ae24, + 0x5226bcf9c441acec, + 0x79ee6c7223e5b547, + 0x6f5f50768330d679, + 0xed73e1e96d8adce9, + 0x27c3da1e1d8ccc03, + // 2^244 * 8 * G + 0x7eb9efb23fe24c74, + 0x3e50f49f1651be01, + 0x3ea732dc21858dea, + 0x17377bd75bb810f9, + 0x28302e71630ef9f6, + 0xc2d4a2032b64cee0, + 0x090820304b6292be, + 0x5fca747aa82adf18, + 0x232a03c35c258ea5, + 0x86f23a2c6bcb0cf1, + 0x3dad8d0d2e442166, + 0x04a8933cab76862b, + // 2^248 * 1 * G + 0xd2c604b622943dff, + 0xbc8cbece44cfb3a0, + 0x5d254ff397808678, + 0x0fa3614f3b1ca6bf, + 0x69082b0e8c936a50, + 0xf9c9a035c1dac5b6, + 0x6fb73e54c4dfb634, + 0x4005419b1d2bc140, + 0xa003febdb9be82f0, + 0x2089c1af3a44ac90, + 0xf8499f911954fa8e, + 0x1fba218aef40ab42, + // 2^248 * 2 * G + 0xab549448fac8f53e, + 0x81f6e89a7ba63741, + 0x74fd6c7d6c2b5e01, + 0x392e3acaa8c86e42, + 0x4f3e57043e7b0194, + 0xa81d3eee08daaf7f, + 0xc839c6ab99dcdef1, + 0x6c535d13ff7761d5, + 0x4cbd34e93e8a35af, + 0x2e0781445887e816, + 0x19319c76f29ab0ab, + 0x25e17fe4d50ac13b, + // 2^248 * 3 * G + 0x0a289bd71e04f676, + 0x208e1c52d6420f95, + 0x5186d8b034691fab, + 0x255751442a9fb351, + 0x915f7ff576f121a7, + 0xc34a32272fcd87e3, + 0xccba2fde4d1be526, + 0x6bba828f8969899b, + 0xe2d1bc6690fe3901, + 0x4cb54a18a0997ad5, + 0x971d6914af8460d4, + 0x559d504f7f6b7be4, + // 2^248 * 4 * G + 0xa7738378b3eb54d5, + 0x1d69d366a5553c7c, + 0x0a26cf62f92800ba, + 0x01ab12d5807e3217, + 0x9c4891e7f6d266fd, + 0x0744a19b0307781b, + 0x88388f1d6061e23b, + 0x123ea6a3354bd50e, + 0x118d189041e32d96, + 0xb9ede3c2d8315848, + 0x1eab4271d83245d9, + 0x4a3961e2c918a154, + // 2^248 * 5 * G + 0x71dc3be0f8e6bba0, + 0xd6cef8347effe30a, + 0xa992425fe13a476a, + 0x2cd6bce3fb1db763, + 0x0327d644f3233f1e, + 0x499a260e34fcf016, + 0x83b5a716f2dab979, + 0x68aceead9bd4111f, + 0x38b4c90ef3d7c210, + 0x308e6e24b7ad040c, + 0x3860d9f1b7e73e23, + 0x595760d5b508f597, + // 2^248 * 6 * G + 0x6129bfe104aa6397, + 0x8f960008a4a7fccb, + 0x3f8bc0897d909458, + 0x709fa43edcb291a9, + 0x882acbebfd022790, + 0x89af3305c4115760, + 0x65f492e37d3473f4, + 0x2cb2c5df54515a2b, + 0xeb0a5d8c63fd2aca, + 0xd22bc1662e694eff, + 0x2723f36ef8cbb03a, + 0x70f029ecf0c8131f, + // 2^248 * 7 * G + 0x461307b32eed3e33, + 0xae042f33a45581e7, + 0xc94449d3195f0366, + 0x0b7d5d8a6c314858, + 0x2a6aafaa5e10b0b9, + 0x78f0a370ef041aa9, + 0x773efb77aa3ad61f, + 0x44eca5a2a74bd9e1, + 0x25d448327b95d543, + 0x70d38300a3340f1d, + 0xde1c531c60e1c52b, + 0x272224512c7de9e4, + // 2^248 * 8 * G + 0x1abc92af49c5342e, + 0xffeed811b2e6fad0, + 0xefa28c8dfcc84e29, + 0x11b5df18a44cc543, + 0xbf7bbb8a42a975fc, + 0x8c5c397796ada358, + 0xe27fc76fcdedaa48, + 0x19735fd7f6bc20a6, + 0xe3ab90d042c84266, + 0xeb848e0f7f19547e, + 0x2503a1d065a497b9, + 0x0fef911191df895f, +]; diff --git a/graviola/src/low/x86_64/edwards25519_scalarmuldouble.rs b/graviola/src/low/x86_64/edwards25519_scalarmuldouble.rs new file mode 100644 index 000000000..7d9980db6 --- /dev/null +++ b/graviola/src/low/x86_64/edwards25519_scalarmuldouble.rs @@ -0,0 +1,3665 @@ +// generated source. do not edit. +#![allow(non_upper_case_globals, unused_macros, unused_imports)] +use crate::low::macros::*; + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Double scalar multiplication for edwards25519, fresh and base point +// Input scalar[4], point[8], bscalar[4]; output res[8] +// +// extern void edwards25519_scalarmuldouble +// (uint64_t res[static 8],const uint64_t scalar[static 4], +// const uint64_t point[static 8],const uint64_t bscalar[static 4]); +// +// Given scalar = n, point = P and bscalar = m, returns in res +// the point (X,Y) = n * P + m * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Both 256-bit coordinates of the input point P are implicitly +// reduced modulo 2^255-19 if they are not already in reduced form, +// but the conventional usage is that they *are* already reduced. +// The scalars can be arbitrary 256-bit numbers but may also be +// considered as implicitly reduced modulo the group order. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point, RCX = bscalar +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point, R9 = bscalar +// ---------------------------------------------------------------------------- + +// Size of individual field elements + +macro_rules! NUMSIZE { + () => { + "32" + }; +} + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. +// Both "resx" and "resy" assume the "res" pointer has been preloaded into rbp. + +macro_rules! resx { () => { Q!("rbp + (0 * " NUMSIZE!() ")") } } +macro_rules! resy { () => { Q!("rbp + (1 * " NUMSIZE!() ")") } } + +macro_rules! scalar { () => { Q!("rsp + (0 * " NUMSIZE!() ")") } } +macro_rules! bscalar { () => { Q!("rsp + (1 * " NUMSIZE!() ")") } } + +macro_rules! tabent { () => { Q!("rsp + (2 * " NUMSIZE!() ")") } } +macro_rules! btabent { () => { Q!("rsp + (6 * " NUMSIZE!() ")") } } + +macro_rules! acc { () => { Q!("rsp + (9 * " NUMSIZE!() ")") } } + +macro_rules! tab { () => { Q!("rsp + (13 * " NUMSIZE!() ")") } } + +// Additional variables kept on the stack + +macro_rules! bf { () => { Q!("QWORD PTR [rsp + 45 * " NUMSIZE!() "]") } } +macro_rules! cf { () => { Q!("QWORD PTR [rsp + 45 * " NUMSIZE!() "+ 8]") } } +macro_rules! i { () => { Q!("QWORD PTR [rsp + 45 * " NUMSIZE!() "+ 16]") } } +macro_rules! res { () => { Q!("QWORD PTR [rsp + 45 * " NUMSIZE!() "+ 24]") } } + +// Total size to reserve on the stack (excluding local subroutines) + +macro_rules! NSPACE { () => { Q!("(46 * " NUMSIZE!() ")") } } + +// Syntactic variants to make x86_att forms easier to generate + +macro_rules! SCALAR { () => { Q!("(0 * " NUMSIZE!() ")") } } +macro_rules! BSCALAR { () => { Q!("(1 * " NUMSIZE!() ")") } } +macro_rules! TABENT { () => { Q!("(2 * " NUMSIZE!() ")") } } +macro_rules! BTABENT { () => { Q!("(6 * " NUMSIZE!() ")") } } +macro_rules! ACC { () => { Q!("(9 * " NUMSIZE!() ")") } } +macro_rules! TAB { () => { Q!("(13 * " NUMSIZE!() ")") } } + +// Sub-references used in local subroutines with local stack + +macro_rules! x_0 { + () => { + "rdi + 0" + }; +} +macro_rules! y_0 { () => { Q!("rdi + " NUMSIZE!()) } } +macro_rules! z_0 { () => { Q!("rdi + (2 * " NUMSIZE!() ")") } } +macro_rules! w_0 { () => { Q!("rdi + (3 * " NUMSIZE!() ")") } } + +macro_rules! x_1 { + () => { + "rsi + 0" + }; +} +macro_rules! y_1 { () => { Q!("rsi + " NUMSIZE!()) } } +macro_rules! z_1 { () => { Q!("rsi + (2 * " NUMSIZE!() ")") } } +macro_rules! w_1 { () => { Q!("rsi + (3 * " NUMSIZE!() ")") } } + +macro_rules! x_2 { + () => { + "rbp + 0" + }; +} +macro_rules! y_2 { () => { Q!("rbp + " NUMSIZE!()) } } +macro_rules! z_2 { () => { Q!("rbp + (2 * " NUMSIZE!() ")") } } +macro_rules! w_2 { () => { Q!("rbp + (3 * " NUMSIZE!() ")") } } + +macro_rules! t0 { () => { Q!("rsp + (0 * " NUMSIZE!() ")") } } +macro_rules! t1 { () => { Q!("rsp + (1 * " NUMSIZE!() ")") } } +macro_rules! t2 { () => { Q!("rsp + (2 * " NUMSIZE!() ")") } } +macro_rules! t3 { () => { Q!("rsp + (3 * " NUMSIZE!() ")") } } +macro_rules! t4 { () => { Q!("rsp + (4 * " NUMSIZE!() ")") } } +macro_rules! t5 { () => { Q!("rsp + (5 * " NUMSIZE!() ")") } } + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519. + +macro_rules! mul_p25519 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "];\n" + "mulx r9, r8, [" $P1 "];\n" + "mulx r10, rax, [" $P1 "+ 0x8];\n" + "add r9, rax;\n" + "mulx r11, rax, [" $P1 "+ 0x10];\n" + "adc r10, rax;\n" + "mulx r12, rax, [" $P1 "+ 0x18];\n" + "adc r11, rax;\n" + "adc r12, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x8];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r9, rax;\n" + "adox r10, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx r13, rax, [" $P1 "+ 0x18];\n" + "adcx r12, rax;\n" + "adox r13, rcx;\n" + "adcx r13, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x10];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r12, rax;\n" + "adox r13, rbx;\n" + "mulx r14, rax, [" $P1 "+ 0x18];\n" + "adcx r13, rax;\n" + "adox r14, rcx;\n" + "adcx r14, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x18];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r12, rax;\n" + "adox r13, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r13, rax;\n" + "adox r14, rbx;\n" + "mulx r15, rax, [" $P1 "+ 0x18];\n" + "adcx r14, rax;\n" + "adox r15, rcx;\n" + "adcx r15, rcx;\n" + "mov edx, 0x26;\n" + "xor ecx, ecx;\n" + "mulx rbx, rax, r12;\n" + "adcx r8, rax;\n" + "adox r9, rbx;\n" + "mulx rbx, rax, r13;\n" + "adcx r9, rax;\n" + "adox r10, rbx;\n" + "mulx rbx, rax, r14;\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx r12, rax, r15;\n" + "adcx r11, rax;\n" + "adox r12, rcx;\n" + "adcx r12, rcx;\n" + "shld r12, r11, 0x1;\n" + "mov edx, 0x13;\n" + "inc r12;\n" + "bts r11, 63;\n" + "mulx rbx, rax, r12;\n" + "add r8, rax;\n" + "adc r9, rbx;\n" + "adc r10, rcx;\n" + "adc r11, rcx;\n" + "sbb rax, rax;\n" + "not rax;\n" + "and rax, rdx;\n" + "sub r8, rax;\n" + "sbb r9, rcx;\n" + "sbb r10, rcx;\n" + "sbb r11, rcx;\n" + "btr r11, 63;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +macro_rules! mul_4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "];\n" + "mulx r9, r8, [" $P1 "];\n" + "mulx r10, rax, [" $P1 "+ 0x8];\n" + "add r9, rax;\n" + "mulx r11, rax, [" $P1 "+ 0x10];\n" + "adc r10, rax;\n" + "mulx r12, rax, [" $P1 "+ 0x18];\n" + "adc r11, rax;\n" + "adc r12, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x8];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r9, rax;\n" + "adox r10, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx r13, rax, [" $P1 "+ 0x18];\n" + "adcx r12, rax;\n" + "adox r13, rcx;\n" + "adcx r13, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x10];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r12, rax;\n" + "adox r13, rbx;\n" + "mulx r14, rax, [" $P1 "+ 0x18];\n" + "adcx r13, rax;\n" + "adox r14, rcx;\n" + "adcx r14, rcx;\n" + "xor ecx, ecx;\n" + "mov rdx, [" $P2 "+ 0x18];\n" + "mulx rbx, rax, [" $P1 "];\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x8];\n" + "adcx r12, rax;\n" + "adox r13, rbx;\n" + "mulx rbx, rax, [" $P1 "+ 0x10];\n" + "adcx r13, rax;\n" + "adox r14, rbx;\n" + "mulx r15, rax, [" $P1 "+ 0x18];\n" + "adcx r14, rax;\n" + "adox r15, rcx;\n" + "adcx r15, rcx;\n" + "mov edx, 0x26;\n" + "xor ecx, ecx;\n" + "mulx rbx, rax, r12;\n" + "adcx r8, rax;\n" + "adox r9, rbx;\n" + "mulx rbx, rax, r13;\n" + "adcx r9, rax;\n" + "adox r10, rbx;\n" + "mulx rbx, rax, r14;\n" + "adcx r10, rax;\n" + "adox r11, rbx;\n" + "mulx r12, rax, r15;\n" + "adcx r11, rax;\n" + "adox r12, rcx;\n" + "adcx r12, rcx;\n" + "shld r12, r11, 0x1;\n" + "btr r11, 0x3f;\n" + "mov edx, 0x13;\n" + "imul rdx, r12;\n" + "add r8, rdx;\n" + "adc r9, rcx;\n" + "adc r10, rcx;\n" + "adc r11, rcx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +macro_rules! sqr_4 { + ($P0:expr, $P1:expr) => { Q!( + "mov rdx, [" $P1 "];\n" + "mulx r15, r8, rdx;\n" + "mulx r10, r9, [" $P1 "+ 0x8];\n" + "mulx r12, r11, [" $P1 "+ 0x18];\n" + "mov rdx, [" $P1 "+ 0x10];\n" + "mulx r14, r13, [" $P1 "+ 0x18];\n" + "xor ebx, ebx;\n" + "mulx rcx, rax, [" $P1 "];\n" + "adcx r10, rax;\n" + "adox r11, rcx;\n" + "mulx rcx, rax, [" $P1 "+ 0x8];\n" + "adcx r11, rax;\n" + "adox r12, rcx;\n" + "mov rdx, [" $P1 "+ 0x18];\n" + "mulx rcx, rax, [" $P1 "+ 0x8];\n" + "adcx r12, rax;\n" + "adox r13, rcx;\n" + "adcx r13, rbx;\n" + "adox r14, rbx;\n" + "adc r14, rbx;\n" + "xor ebx, ebx;\n" + "adcx r9, r9;\n" + "adox r9, r15;\n" + "mov rdx, [" $P1 "+ 0x8];\n" + "mulx rdx, rax, rdx;\n" + "adcx r10, r10;\n" + "adox r10, rax;\n" + "adcx r11, r11;\n" + "adox r11, rdx;\n" + "mov rdx, [" $P1 "+ 0x10];\n" + "mulx rdx, rax, rdx;\n" + "adcx r12, r12;\n" + "adox r12, rax;\n" + "adcx r13, r13;\n" + "adox r13, rdx;\n" + "mov rdx, [" $P1 "+ 0x18];\n" + "mulx r15, rax, rdx;\n" + "adcx r14, r14;\n" + "adox r14, rax;\n" + "adcx r15, rbx;\n" + "adox r15, rbx;\n" + "mov edx, 0x26;\n" + "xor ebx, ebx;\n" + "mulx rcx, rax, r12;\n" + "adcx r8, rax;\n" + "adox r9, rcx;\n" + "mulx rcx, rax, r13;\n" + "adcx r9, rax;\n" + "adox r10, rcx;\n" + "mulx rcx, rax, r14;\n" + "adcx r10, rax;\n" + "adox r11, rcx;\n" + "mulx r12, rax, r15;\n" + "adcx r11, rax;\n" + "adox r12, rbx;\n" + "adcx r12, rbx;\n" + "shld r12, r11, 0x1;\n" + "btr r11, 0x3f;\n" + "mov edx, 0x13;\n" + "imul rdx, r12;\n" + "add r8, rdx;\n" + "adc r9, rbx;\n" + "adc r10, rbx;\n" + "adc r11, rbx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +macro_rules! sub_twice4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "mov r8, [" $P1 "];\n" + "xor ebx, ebx;\n" + "sub r8, [" $P2 "];\n" + "mov r9, [" $P1 "+ 8];\n" + "sbb r9, [" $P2 "+ 8];\n" + "mov ecx, 38;\n" + "mov r10, [" $P1 "+ 16];\n" + "sbb r10, [" $P2 "+ 16];\n" + "mov rax, [" $P1 "+ 24];\n" + "sbb rax, [" $P2 "+ 24];\n" + "cmovnc rcx, rbx;\n" + "sub r8, rcx;\n" + "sbb r9, rbx;\n" + "sbb r10, rbx;\n" + "sbb rax, rbx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 8], r9;\n" + "mov [" $P0 "+ 16], r10;\n" + "mov [" $P0 "+ 24], rax" + )} +} + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +macro_rules! add_twice4 { + ($P0:expr, $P1:expr, $P2:expr) => { Q!( + "mov r8, [" $P1 "];\n" + "xor ecx, ecx;\n" + "add r8, [" $P2 "];\n" + "mov r9, [" $P1 "+ 0x8];\n" + "adc r9, [" $P2 "+ 0x8];\n" + "mov r10, [" $P1 "+ 0x10];\n" + "adc r10, [" $P2 "+ 0x10];\n" + "mov r11, [" $P1 "+ 0x18];\n" + "adc r11, [" $P2 "+ 0x18];\n" + "mov eax, 38;\n" + "cmovnc rax, rcx;\n" + "add r8, rax;\n" + "adc r9, rcx;\n" + "adc r10, rcx;\n" + "adc r11, rcx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +macro_rules! double_twice4 { + ($P0:expr, $P1:expr) => { Q!( + "mov r8, [" $P1 "];\n" + "xor ecx, ecx;\n" + "add r8, r8;\n" + "mov r9, [" $P1 "+ 0x8];\n" + "adc r9, r9;\n" + "mov r10, [" $P1 "+ 0x10];\n" + "adc r10, r10;\n" + "mov r11, [" $P1 "+ 0x18];\n" + "adc r11, r11;\n" + "mov eax, 38;\n" + "cmovnc rax, rcx;\n" + "add r8, rax;\n" + "adc r9, rcx;\n" + "adc r10, rcx;\n" + "adc r11, rcx;\n" + "mov [" $P0 "], r8;\n" + "mov [" $P0 "+ 0x8], r9;\n" + "mov [" $P0 "+ 0x10], r10;\n" + "mov [" $P0 "+ 0x18], r11" + )} +} + +// Load the constant k_25519 = 2 * d_25519 using immediate operations + +macro_rules! load_k25519 { + ($P0:expr) => { Q!( + "mov rax, 0xebd69b9426b2f159;\n" + "mov [" $P0 "], rax;\n" + "mov rax, 0x00e0149a8283b156;\n" + "mov [" $P0 "+ 8], rax;\n" + "mov rax, 0x198e80f2eef3d130;\n" + "mov [" $P0 "+ 16], rax;\n" + "mov rax, 0x2406d9dc56dffce7;\n" + "mov [" $P0 "+ 24], rax" + )} +} + +/// Double scalar multiplication for edwards25519, fresh and base point +/// +/// Input scalar[4], point[8], bscalar[4]; output res[8] +/// +/// Given scalar = n, point = P and bscalar = m, returns in res +/// the point (X,Y) = n * P + m * B where B = (...,4/5) is +/// the standard basepoint for the edwards25519 (Ed25519) curve. +/// +/// Both 256-bit coordinates of the input point P are implicitly +/// reduced modulo 2^255-19 if they are not already in reduced form, +/// but the conventional usage is that they *are* already reduced. +/// The scalars can be arbitrary 256-bit numbers but may also be +/// considered as implicitly reduced modulo the group order. +pub(crate) fn edwards25519_scalarmuldouble( + res: &mut [u64; 8], + scalar: &[u64; 4], + point: &[u64; 8], + bscalar: &[u64; 4], +) { + // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info. + unsafe { + core::arch::asm!( + + Q!(" endbr64 " ), + + // In this case the Windows form literally makes a subroutine call. + // This avoids hassle arising from keeping code and data together. + + + + // Save registers, make room for temps, preserve input arguments. + + Q!(" push " "rbx"), + Q!(" push " "rbp"), + Q!(" push " "r12"), + Q!(" push " "r13"), + Q!(" push " "r14"), + Q!(" push " "r15"), + Q!(" sub " "rsp, " NSPACE!()), + + // Move the output pointer to a stable place + + Q!(" mov " res!() ", rdi"), + + // Copy scalars while recoding all 4-bit nybbles except the top + // one (bits 252..255) into signed 4-bit digits. This is essentially + // done just by adding the recoding constant 0x0888..888, after + // which all digits except the first have an implicit bias of -8, + // so 0 -> -8, 1 -> -7, ... 7 -> -1, 8 -> 0, 9 -> 1, ... 15 -> 7. + // (We could literally create 2s complement signed nybbles by + // XORing with the same constant 0x0888..888 afterwards, but it + // doesn't seem to make the end usage any simpler.) + // + // In order to ensure that the unrecoded top nybble (bits 252..255) + // does not become > 8 as a result of carries lower down from the + // recoding, we first (conceptually) subtract the group order iff + // the top digit of the scalar is > 2^63. In the implementation the + // reduction and recoding are combined by optionally using the + // modified recoding constant 0x0888...888 + (2^256 - group_order). + + Q!(" mov " "r8, [rcx]"), + Q!(" mov " "r9, [rcx + 8]"), + Q!(" mov " "r10, [rcx + 16]"), + Q!(" mov " "r11, [rcx + 24]"), + Q!(" mov " "r12, 0xc7f56fb5a0d9e920"), + Q!(" mov " "r13, 0xe190b99370cba1d5"), + Q!(" mov " "r14, 0x8888888888888887"), + Q!(" mov " "r15, 0x8888888888888888"), + Q!(" mov " "rax, 0x8000000000000000"), + Q!(" mov " "rbx, 0x0888888888888888"), + Q!(" cmp " "rax, r11"), + Q!(" cmovnc " "r12, r15"), + Q!(" cmovnc " "r13, r15"), + Q!(" cmovnc " "r14, r15"), + Q!(" cmovnc " "r15, rbx"), + Q!(" add " "r8, r12"), + Q!(" adc " "r9, r13"), + Q!(" adc " "r10, r14"), + Q!(" adc " "r11, r15"), + Q!(" mov " "[rsp + " BSCALAR!() "], r8"), + Q!(" mov " "[rsp + " BSCALAR!() "+ 8], r9"), + Q!(" mov " "[rsp + " BSCALAR!() "+ 16], r10"), + Q!(" mov " "[rsp + " BSCALAR!() "+ 24], r11"), + + Q!(" mov " "r8, [rsi]"), + Q!(" mov " "r9, [rsi + 8]"), + Q!(" mov " "r10, [rsi + 16]"), + Q!(" mov " "r11, [rsi + 24]"), + Q!(" mov " "r12, 0xc7f56fb5a0d9e920"), + Q!(" mov " "r13, 0xe190b99370cba1d5"), + Q!(" mov " "r14, 0x8888888888888887"), + Q!(" mov " "r15, 0x8888888888888888"), + Q!(" mov " "rax, 0x8000000000000000"), + Q!(" mov " "rbx, 0x0888888888888888"), + Q!(" cmp " "rax, r11"), + Q!(" cmovnc " "r12, r15"), + Q!(" cmovnc " "r13, r15"), + Q!(" cmovnc " "r14, r15"), + Q!(" cmovnc " "r15, rbx"), + Q!(" add " "r8, r12"), + Q!(" adc " "r9, r13"), + Q!(" adc " "r10, r14"), + Q!(" adc " "r11, r15"), + Q!(" mov " "[rsp + " SCALAR!() "], r8"), + Q!(" mov " "[rsp + " SCALAR!() "+ 8], r9"), + Q!(" mov " "[rsp + " SCALAR!() "+ 16], r10"), + Q!(" mov " "[rsp + " SCALAR!() "+ 24], r11"), + + // Create table of multiples 1..8 of the general input point at "tab". + // Reduce the input coordinates x and y modulo 2^256 - 38 first, for the + // sake of definiteness; this is the reduction that will be maintained. + // We could slightly optimize the additions because we know the input + // point is affine (so Z = 1), but it doesn't seem worth the complication. + + Q!(" mov " "eax, 38"), + Q!(" mov " "r8, [rdx]"), + Q!(" xor " "ebx, ebx"), + Q!(" mov " "r9, [rdx + 8]"), + Q!(" xor " "ecx, ecx"), + Q!(" mov " "r10, [rdx + 16]"), + Q!(" xor " "esi, esi"), + Q!(" mov " "r11, [rdx + 24]"), + Q!(" add " "rax, r8"), + Q!(" adc " "rbx, r9"), + Q!(" adc " "rcx, r10"), + Q!(" adc " "rsi, r11"), + Q!(" cmovnc " "rax, r8"), + Q!(" mov " "[rsp + " TAB!() "], rax"), + Q!(" cmovnc " "rbx, r9"), + Q!(" mov " "[rsp + " TAB!() "+ 8], rbx"), + Q!(" cmovnc " "rcx, r10"), + Q!(" mov " "[rsp + " TAB!() "+ 16], rcx"), + Q!(" cmovnc " "rsi, r11"), + Q!(" mov " "[rsp + " TAB!() "+ 24], rsi"), + + Q!(" mov " "eax, 38"), + Q!(" mov " "r8, [rdx + 32]"), + Q!(" xor " "ebx, ebx"), + Q!(" mov " "r9, [rdx + 40]"), + Q!(" xor " "ecx, ecx"), + Q!(" mov " "r10, [rdx + 48]"), + Q!(" xor " "esi, esi"), + Q!(" mov " "r11, [rdx + 56]"), + Q!(" add " "rax, r8"), + Q!(" adc " "rbx, r9"), + Q!(" adc " "rcx, r10"), + Q!(" adc " "rsi, r11"), + Q!(" cmovnc " "rax, r8"), + Q!(" mov " "[rsp + " TAB!() "+ 32], rax"), + Q!(" cmovnc " "rbx, r9"), + Q!(" mov " "[rsp + " TAB!() "+ 40], rbx"), + Q!(" cmovnc " "rcx, r10"), + Q!(" mov " "[rsp + " TAB!() "+ 48], rcx"), + Q!(" cmovnc " "rsi, r11"), + Q!(" mov " "[rsp + " TAB!() "+ 56], rsi"), + + Q!(" mov " "eax, 1"), + Q!(" mov " "[rsp + " TAB!() "+ 64], rax"), + Q!(" xor " "eax, eax"), + Q!(" mov " "[rsp + " TAB!() "+ 72], rax"), + Q!(" mov " "[rsp + " TAB!() "+ 80], rax"), + Q!(" mov " "[rsp + " TAB!() "+ 88], rax"), + + Q!(" lea " "rdi, [rsp + " TAB!() "+ 96]"), + Q!(" lea " "rsi, [rsp + " TAB!() "]"), + Q!(" lea " "rbp, [rsp + " TAB!() "+ 32]"), + mul_4!(x_0!(), x_1!(), x_2!()), + + // Multiple 2 + + Q!(" lea " "rdi, [rsp + " TAB!() "+ 1 * 128]"), + Q!(" lea " "rsi, [rsp + " TAB!() "]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epdouble", 2, After)), + + // Multiple 3 + + Q!(" lea " "rdi, [rsp + " TAB!() "+ 2 * 128]"), + Q!(" lea " "rsi, [rsp + " TAB!() "]"), + Q!(" lea " "rbp, [rsp + " TAB!() "+ 1 * 128]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epadd", 3, After)), + + // Multiple 4 + + Q!(" lea " "rdi, [rsp + " TAB!() "+ 3 * 128]"), + Q!(" lea " "rsi, [rsp + " TAB!() "+ 1 * 128]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epdouble", 2, After)), + + // Multiple 5 + + Q!(" lea " "rdi, [rsp + " TAB!() "+ 4 * 128]"), + Q!(" lea " "rsi, [rsp + " TAB!() "]"), + Q!(" lea " "rbp, [rsp + " TAB!() "+ 3 * 128]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epadd", 3, After)), + + // Multiple 6 + + Q!(" lea " "rdi, [rsp + " TAB!() "+ 5 * 128]"), + Q!(" lea " "rsi, [rsp + " TAB!() "+ 2 * 128]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epdouble", 2, After)), + + // Multiple 7 + + Q!(" lea " "rdi, [rsp + " TAB!() "+ 6 * 128]"), + Q!(" lea " "rsi, [rsp + " TAB!() "]"), + Q!(" lea " "rbp, [rsp + " TAB!() "+ 5 * 128]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epadd", 3, After)), + + // Multiple 8 + + Q!(" lea " "rdi, [rsp + " TAB!() "+ 7 * 128]"), + Q!(" lea " "rsi, [rsp + " TAB!() "+ 3 * 128]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epdouble", 2, After)), + + // Handle the initialization, starting the loop counter at i = 252 + // and initializing acc to the sum of the table entries for the + // top nybbles of the scalars (the ones with no implicit -8 bias). + + Q!(" mov " "rax, 252"), + Q!(" mov " i!() ", rax"), + + // Index for btable entry... + + Q!(" mov " "rax, [rsp + " BSCALAR!() "+ 24]"), + Q!(" shr " "rax, 60"), + Q!(" mov " bf!() ", rax"), + + // ...and constant-time indexing based on that index + + Q!(" mov " "eax, 1"), + Q!(" xor " "ebx, ebx"), + Q!(" xor " "ecx, ecx"), + Q!(" xor " "edx, edx"), + Q!(" mov " "r8d, 1"), + Q!(" xor " "r9d, r9d"), + Q!(" xor " "r10d, r10d"), + Q!(" xor " "r11d, r11d"), + Q!(" xor " "r12d, r12d"), + Q!(" xor " "r13d, r13d"), + Q!(" xor " "r14d, r14d"), + Q!(" xor " "r15d, r15d"), + + Q!(" lea " "rbp, [rip + {edwards25519_scalarmuldouble_table}]"), + + Q!(" cmp " bf!() ", 1"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 2"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 3"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 4"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 5"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 6"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 7"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 8"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + + Q!(" mov " "[rsp + " BTABENT!() "], rax"), + Q!(" mov " "[rsp + " BTABENT!() "+ 8], rbx"), + Q!(" mov " "[rsp + " BTABENT!() "+ 16], rcx"), + Q!(" mov " "[rsp + " BTABENT!() "+ 24], rdx"), + Q!(" mov " "[rsp + " BTABENT!() "+ 32], r8"), + Q!(" mov " "[rsp + " BTABENT!() "+ 40], r9"), + Q!(" mov " "[rsp + " BTABENT!() "+ 48], r10"), + Q!(" mov " "[rsp + " BTABENT!() "+ 56], r11"), + Q!(" mov " "[rsp + " BTABENT!() "+ 64], r12"), + Q!(" mov " "[rsp + " BTABENT!() "+ 72], r13"), + Q!(" mov " "[rsp + " BTABENT!() "+ 80], r14"), + Q!(" mov " "[rsp + " BTABENT!() "+ 88], r15"), + + // Index for table entry... + + Q!(" mov " "rax, [rsp + " SCALAR!() "+ 24]"), + Q!(" shr " "rax, 60"), + Q!(" mov " bf!() ", rax"), + + // ...and constant-time indexing based on that index. + // Do the Y and Z fields first, to save on registers... + + Q!(" mov " "eax, 1"), + Q!(" xor " "ebx, ebx"), + Q!(" xor " "ecx, ecx"), + Q!(" xor " "edx, edx"), + Q!(" mov " "r8d, 1"), + Q!(" xor " "r9d, r9d"), + Q!(" xor " "r10d, r10d"), + Q!(" xor " "r11d, r11d"), + + Q!(" lea " "rbp, [rsp + " TAB!() "+ 32]"), + + Q!(" cmp " bf!() ", 1"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 2"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 3"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 4"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 5"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 6"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 7"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 8"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + + Q!(" mov " "[rsp + " TABENT!() "+ 32], rax"), + Q!(" mov " "[rsp + " TABENT!() "+ 40], rbx"), + Q!(" mov " "[rsp + " TABENT!() "+ 48], rcx"), + Q!(" mov " "[rsp + " TABENT!() "+ 56], rdx"), + Q!(" mov " "[rsp + " TABENT!() "+ 64], r8"), + Q!(" mov " "[rsp + " TABENT!() "+ 72], r9"), + Q!(" mov " "[rsp + " TABENT!() "+ 80], r10"), + Q!(" mov " "[rsp + " TABENT!() "+ 88], r11"), + + // ...followed by the X and W fields + + Q!(" lea " "rbp, [rsp + " TAB!() "]"), + + Q!(" xor " "eax, eax"), + Q!(" xor " "ebx, ebx"), + Q!(" xor " "ecx, ecx"), + Q!(" xor " "edx, edx"), + Q!(" xor " "r8d, r8d"), + Q!(" xor " "r9d, r9d"), + Q!(" xor " "r10d, r10d"), + Q!(" xor " "r11d, r11d"), + + Q!(" cmp " bf!() ", 1"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 2"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 3"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 4"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 5"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 6"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 7"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 8"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + + Q!(" mov " "[rsp + " TABENT!() "], rax"), + Q!(" mov " "[rsp + " TABENT!() "+ 8], rbx"), + Q!(" mov " "[rsp + " TABENT!() "+ 16], rcx"), + Q!(" mov " "[rsp + " TABENT!() "+ 24], rdx"), + Q!(" mov " "[rsp + " TABENT!() "+ 96], r8"), + Q!(" mov " "[rsp + " TABENT!() "+ 104], r9"), + Q!(" mov " "[rsp + " TABENT!() "+ 112], r10"), + Q!(" mov " "[rsp + " TABENT!() "+ 120], r11"), + + // Add those elements to initialize the accumulator for bit position 252 + + Q!(" lea " "rdi, [rsp + " ACC!() "]"), + Q!(" lea " "rsi, [rsp + " TABENT!() "]"), + Q!(" lea " "rbp, [rsp + " BTABENT!() "]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_pepadd", 4, After)), + + // Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint + // Start with i = 252 for bits 248..251 and go down four at a time to 3..0 + + Q!(Label!("edwards25519_scalarmuldouble_loop", 5) ":"), + + Q!(" mov " "rax, " i!()), + Q!(" sub " "rax, 4"), + Q!(" mov " i!() ", rax"), + + // Double to acc' = 2 * acc + + Q!(" lea " "rdi, [rsp + " ACC!() "]"), + Q!(" lea " "rsi, [rsp + " ACC!() "]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_pdouble", 6, After)), + + // Get btable entry, first getting the adjusted bitfield... + + Q!(" mov " "rax, " i!()), + Q!(" mov " "rcx, rax"), + Q!(" shr " "rax, 6"), + Q!(" mov " "rax, [rsp + 8 * rax + 32]"), + Q!(" shr " "rax, cl"), + Q!(" and " "rax, 15"), + + Q!(" sub " "rax, 8"), + Q!(" sbb " "rcx, rcx"), + Q!(" xor " "rax, rcx"), + Q!(" sub " "rax, rcx"), + Q!(" mov " cf!() ", rcx"), + Q!(" mov " bf!() ", rax"), + + // ... then doing constant-time lookup with the appropriate index... + + Q!(" mov " "eax, 1"), + Q!(" xor " "ebx, ebx"), + Q!(" xor " "ecx, ecx"), + Q!(" xor " "edx, edx"), + Q!(" mov " "r8d, 1"), + Q!(" xor " "r9d, r9d"), + Q!(" xor " "r10d, r10d"), + Q!(" xor " "r11d, r11d"), + Q!(" xor " "r12d, r12d"), + Q!(" xor " "r13d, r13d"), + Q!(" xor " "r14d, r14d"), + Q!(" xor " "r15d, r15d"), + + Q!(" lea " "rbp, [rip + {edwards25519_scalarmuldouble_table}]"), + + Q!(" cmp " bf!() ", 1"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 2"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 3"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 4"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 5"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 6"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 7"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + Q!(" add " "rbp, 96"), + + Q!(" cmp " bf!() ", 8"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" mov " "rsi, [rbp + 64]"), + Q!(" cmovz " "r12, rsi"), + Q!(" mov " "rsi, [rbp + 72]"), + Q!(" cmovz " "r13, rsi"), + Q!(" mov " "rsi, [rbp + 80]"), + Q!(" cmovz " "r14, rsi"), + Q!(" mov " "rsi, [rbp + 88]"), + Q!(" cmovz " "r15, rsi"), + + // ... then optionally negating before storing. The table entry + // is in precomputed form and we currently have + // + // [rdx;rcx;rbx;rax] = y - x + // [r11;r10;r9;r8] = x + y + // [r15;r14;r13;r12] = 2 * d * x * y + // + // Negation for Edwards curves is -(x,y) = (-x,y), which in this modified + // form amounts to swapping the first two fields and negating the third. + // The negation does not always fully reduce even mod 2^256-38 in the zero + // case, instead giving -0 = 2^256-38. But that is fine since the result is + // always fed to a multiplication inside the "pepadd" function below that + // handles any 256-bit input. + + Q!(" mov " "rdi, " cf!()), + Q!(" test " "rdi, rdi"), + + Q!(" mov " "rsi, rax"), + Q!(" cmovnz " "rsi, r8"), + Q!(" cmovnz " "r8, rax"), + Q!(" mov " "[rsp + " BTABENT!() "], rsi"), + Q!(" mov " "[rsp + " BTABENT!() "+ 32], r8"), + + Q!(" mov " "rsi, rbx"), + Q!(" cmovnz " "rsi, r9"), + Q!(" cmovnz " "r9, rbx"), + Q!(" mov " "[rsp + " BTABENT!() "+ 8], rsi"), + Q!(" mov " "[rsp + " BTABENT!() "+ 40], r9"), + + Q!(" mov " "rsi, rcx"), + Q!(" cmovnz " "rsi, r10"), + Q!(" cmovnz " "r10, rcx"), + Q!(" mov " "[rsp + " BTABENT!() "+ 16], rsi"), + Q!(" mov " "[rsp + " BTABENT!() "+ 48], r10"), + + Q!(" mov " "rsi, rdx"), + Q!(" cmovnz " "rsi, r11"), + Q!(" cmovnz " "r11, rdx"), + Q!(" mov " "[rsp + " BTABENT!() "+ 24], rsi"), + Q!(" mov " "[rsp + " BTABENT!() "+ 56], r11"), + + Q!(" xor " "r12, rdi"), + Q!(" xor " "r13, rdi"), + Q!(" xor " "r14, rdi"), + Q!(" xor " "r15, rdi"), + Q!(" and " "rdi, 37"), + Q!(" sub " "r12, rdi"), + Q!(" sbb " "r13, 0"), + Q!(" sbb " "r14, 0"), + Q!(" sbb " "r15, 0"), + Q!(" mov " "[rsp + " BTABENT!() "+ 64], r12"), + Q!(" mov " "[rsp + " BTABENT!() "+ 72], r13"), + Q!(" mov " "[rsp + " BTABENT!() "+ 80], r14"), + Q!(" mov " "[rsp + " BTABENT!() "+ 88], r15"), + + // Get table entry, first getting the adjusted bitfield... + + Q!(" mov " "rax, " i!()), + Q!(" mov " "rcx, rax"), + Q!(" shr " "rax, 6"), + Q!(" mov " "rax, [rsp + 8 * rax]"), + Q!(" shr " "rax, cl"), + Q!(" and " "rax, 15"), + + Q!(" sub " "rax, 8"), + Q!(" sbb " "rcx, rcx"), + Q!(" xor " "rax, rcx"), + Q!(" sub " "rax, rcx"), + Q!(" mov " cf!() ", rcx"), + Q!(" mov " bf!() ", rax"), + + // ...and constant-time indexing based on that index + // Do the Y and Z fields first, to save on registers + // and store them back (they don't need any modification) + + Q!(" mov " "eax, 1"), + Q!(" xor " "ebx, ebx"), + Q!(" xor " "ecx, ecx"), + Q!(" xor " "edx, edx"), + Q!(" mov " "r8d, 1"), + Q!(" xor " "r9d, r9d"), + Q!(" xor " "r10d, r10d"), + Q!(" xor " "r11d, r11d"), + + Q!(" lea " "rbp, [rsp + " TAB!() "+ 32]"), + + Q!(" cmp " bf!() ", 1"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 2"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 3"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 4"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 5"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 6"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 7"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 8"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 32]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 40]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 48]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 56]"), + Q!(" cmovz " "r11, rsi"), + + Q!(" mov " "[rsp + " TABENT!() "+ 32], rax"), + Q!(" mov " "[rsp + " TABENT!() "+ 40], rbx"), + Q!(" mov " "[rsp + " TABENT!() "+ 48], rcx"), + Q!(" mov " "[rsp + " TABENT!() "+ 56], rdx"), + Q!(" mov " "[rsp + " TABENT!() "+ 64], r8"), + Q!(" mov " "[rsp + " TABENT!() "+ 72], r9"), + Q!(" mov " "[rsp + " TABENT!() "+ 80], r10"), + Q!(" mov " "[rsp + " TABENT!() "+ 88], r11"), + + // Now do the X and W fields... + + Q!(" lea " "rbp, [rsp + " TAB!() "]"), + + Q!(" xor " "eax, eax"), + Q!(" xor " "ebx, ebx"), + Q!(" xor " "ecx, ecx"), + Q!(" xor " "edx, edx"), + Q!(" xor " "r8d, r8d"), + Q!(" xor " "r9d, r9d"), + Q!(" xor " "r10d, r10d"), + Q!(" xor " "r11d, r11d"), + + Q!(" cmp " bf!() ", 1"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 2"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 3"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 4"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 5"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 6"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 7"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + Q!(" add " "rbp, 128"), + + Q!(" cmp " bf!() ", 8"), + Q!(" mov " "rsi, [rbp]"), + Q!(" cmovz " "rax, rsi"), + Q!(" mov " "rsi, [rbp + 8]"), + Q!(" cmovz " "rbx, rsi"), + Q!(" mov " "rsi, [rbp + 16]"), + Q!(" cmovz " "rcx, rsi"), + Q!(" mov " "rsi, [rbp + 24]"), + Q!(" cmovz " "rdx, rsi"), + Q!(" mov " "rsi, [rbp + 96]"), + Q!(" cmovz " "r8, rsi"), + Q!(" mov " "rsi, [rbp + 104]"), + Q!(" cmovz " "r9, rsi"), + Q!(" mov " "rsi, [rbp + 112]"), + Q!(" cmovz " "r10, rsi"), + Q!(" mov " "rsi, [rbp + 120]"), + Q!(" cmovz " "r11, rsi"), + + // ... then optionally negate before storing the X and W fields. This + // time the table entry is extended-projective, and is here: + // + // [rdx;rcx;rbx;rax] = X + // [tabent+32] = Y + // [tabent+64] = Z + // [r11;r10;r9;r8] = W + // + // This time we just need to negate the X and the W fields. + // The crude way negation is done can result in values of X or W + // (when initially zero before negation) being exactly equal to + // 2^256-38, but the "pepadd" function handles that correctly. + + Q!(" mov " "rdi, " cf!()), + + Q!(" xor " "rax, rdi"), + Q!(" xor " "rbx, rdi"), + Q!(" xor " "rcx, rdi"), + Q!(" xor " "rdx, rdi"), + + Q!(" xor " "r8, rdi"), + Q!(" xor " "r9, rdi"), + Q!(" xor " "r10, rdi"), + Q!(" xor " "r11, rdi"), + + Q!(" and " "rdi, 37"), + + Q!(" sub " "rax, rdi"), + Q!(" sbb " "rbx, 0"), + Q!(" sbb " "rcx, 0"), + Q!(" sbb " "rdx, 0"), + + Q!(" mov " "[rsp + " TABENT!() "], rax"), + Q!(" mov " "[rsp + " TABENT!() "+ 8], rbx"), + Q!(" mov " "[rsp + " TABENT!() "+ 16], rcx"), + Q!(" mov " "[rsp + " TABENT!() "+ 24], rdx"), + + Q!(" sub " "r8, rdi"), + Q!(" sbb " "r9, 0"), + Q!(" sbb " "r10, 0"), + Q!(" sbb " "r11, 0"), + + Q!(" mov " "[rsp + " TABENT!() "+ 96], r8"), + Q!(" mov " "[rsp + " TABENT!() "+ 104], r9"), + Q!(" mov " "[rsp + " TABENT!() "+ 112], r10"), + Q!(" mov " "[rsp + " TABENT!() "+ 120], r11"), + + // Double to acc' = 4 * acc + + Q!(" lea " "rdi, [rsp + " ACC!() "]"), + Q!(" lea " "rsi, [rsp + " ACC!() "]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_pdouble", 6, After)), + + // Add tabent := tabent + btabent + + Q!(" lea " "rdi, [rsp + " TABENT!() "]"), + Q!(" lea " "rsi, [rsp + " TABENT!() "]"), + Q!(" lea " "rbp, [rsp + " BTABENT!() "]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_pepadd", 4, After)), + + // Double to acc' = 8 * acc + + Q!(" lea " "rdi, [rsp + " ACC!() "]"), + Q!(" lea " "rsi, [rsp + " ACC!() "]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_pdouble", 6, After)), + + // Double to acc' = 16 * acc + + Q!(" lea " "rdi, [rsp + " ACC!() "]"), + Q!(" lea " "rsi, [rsp + " ACC!() "]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epdouble", 2, After)), + + // Add table entry, acc := acc + tabent + + Q!(" lea " "rdi, [rsp + " ACC!() "]"), + Q!(" lea " "rsi, [rsp + " ACC!() "]"), + Q!(" lea " "rbp, [rsp + " TABENT!() "]"), + Q!(" call " Label!("edwards25519_scalarmuldouble_epadd", 3, After)), + + // Loop down + + Q!(" mov " "rax, " i!()), + Q!(" test " "rax, rax"), + Q!(" jnz " Label!("edwards25519_scalarmuldouble_loop", 5, Before)), + + // Prepare to call the modular inverse function to get tab = 1/z + + Q!(" lea " "rdi, [rsp + " TAB!() "]"), + Q!(" lea " "rsi, [rsp + " ACC!() "+ 64]"), + + // Inline copy of bignum_inv_p25519, identical except for stripping out + // the prologue and epilogue saving and restoring registers and making + // and reclaiming room on the stack. For more details and explanations see + // "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for + // its own temporaries is 208 bytes, so it has no effect on variables + // that are needed in the rest of our computation here: res, tab and acc. + + Q!(" mov " "[rsp + 0xc0], rdi"), + Q!(" xor " "eax, eax"), + Q!(" lea " "rcx, [rax -0x13]"), + Q!(" not " "rax"), + Q!(" mov " "[rsp], rcx"), + Q!(" mov " "[rsp + 0x8], rax"), + Q!(" mov " "[rsp + 0x10], rax"), + Q!(" btr " "rax, 0x3f"), + Q!(" mov " "[rsp + 0x18], rax"), + Q!(" mov " "rdx, [rsi]"), + Q!(" mov " "rcx, [rsi + 0x8]"), + Q!(" mov " "r8, [rsi + 0x10]"), + Q!(" mov " "r9, [rsi + 0x18]"), + Q!(" mov " "eax, 0x1"), + Q!(" xor " "r10d, r10d"), + Q!(" bts " "r9, 0x3f"), + Q!(" adc " "rax, r10"), + Q!(" imul " "rax, rax, 0x13"), + Q!(" add " "rdx, rax"), + Q!(" adc " "rcx, r10"), + Q!(" adc " "r8, r10"), + Q!(" adc " "r9, r10"), + Q!(" mov " "eax, 0x13"), + Q!(" cmovb " "rax, r10"), + Q!(" sub " "rdx, rax"), + Q!(" sbb " "rcx, r10"), + Q!(" sbb " "r8, r10"), + Q!(" sbb " "r9, r10"), + Q!(" btr " "r9, 0x3f"), + Q!(" mov " "[rsp + 0x20], rdx"), + Q!(" mov " "[rsp + 0x28], rcx"), + Q!(" mov " "[rsp + 0x30], r8"), + Q!(" mov " "[rsp + 0x38], r9"), + Q!(" xor " "eax, eax"), + Q!(" mov " "[rsp + 0x40], rax"), + Q!(" mov " "[rsp + 0x48], rax"), + Q!(" mov " "[rsp + 0x50], rax"), + Q!(" mov " "[rsp + 0x58], rax"), + Q!(" movabs " "rax, 0xa0f99e2375022099"), + Q!(" mov " "[rsp + 0x60], rax"), + Q!(" movabs " "rax, 0xa8c68f3f1d132595"), + Q!(" mov " "[rsp + 0x68], rax"), + Q!(" movabs " "rax, 0x6c6c893805ac5242"), + Q!(" mov " "[rsp + 0x70], rax"), + Q!(" movabs " "rax, 0x276508b241770615"), + Q!(" mov " "[rsp + 0x78], rax"), + Q!(" mov " "QWORD PTR [rsp + 0x90], 0xa"), + Q!(" mov " "QWORD PTR [rsp + 0x98], 0x1"), + Q!(" jmp " Label!("edwards25519_scalarmuldouble_midloop", 7, After)), + Q!(Label!("edwards25519_scalarmuldouble_inverseloop", 8) ":"), + Q!(" mov " "r9, r8"), + Q!(" sar " "r9, 0x3f"), + Q!(" xor " "r8, r9"), + Q!(" sub " "r8, r9"), + Q!(" mov " "r11, r10"), + Q!(" sar " "r11, 0x3f"), + Q!(" xor " "r10, r11"), + Q!(" sub " "r10, r11"), + Q!(" mov " "r13, r12"), + Q!(" sar " "r13, 0x3f"), + Q!(" xor " "r12, r13"), + Q!(" sub " "r12, r13"), + Q!(" mov " "r15, r14"), + Q!(" sar " "r15, 0x3f"), + Q!(" xor " "r14, r15"), + Q!(" sub " "r14, r15"), + Q!(" mov " "rax, r8"), + Q!(" and " "rax, r9"), + Q!(" mov " "rdi, r10"), + Q!(" and " "rdi, r11"), + Q!(" add " "rdi, rax"), + Q!(" mov " "[rsp + 0x80], rdi"), + Q!(" mov " "rax, r12"), + Q!(" and " "rax, r13"), + Q!(" mov " "rsi, r14"), + Q!(" and " "rsi, r15"), + Q!(" add " "rsi, rax"), + Q!(" mov " "[rsp + 0x88], rsi"), + Q!(" xor " "ebx, ebx"), + Q!(" mov " "rax, [rsp]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rdi, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" mov " "rax, [rsp + 0x20]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rdi, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rax, [rsp]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" mov " "rax, [rsp + 0x20]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" xor " "ecx, ecx"), + Q!(" mov " "rax, [rsp + 0x8]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" mov " "rax, [rsp + 0x28]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" shrd " "rdi, rbx, 0x3b"), + Q!(" mov " "[rsp], rdi"), + Q!(" xor " "edi, edi"), + Q!(" mov " "rax, [rsp + 0x8]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rdi, rdx"), + Q!(" mov " "rax, [rsp + 0x28]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rdi, rdx"), + Q!(" shrd " "rsi, rbp, 0x3b"), + Q!(" mov " "[rsp + 0x20], rsi"), + Q!(" xor " "esi, esi"), + Q!(" mov " "rax, [rsp + 0x10]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "rax, [rsp + 0x30]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" shrd " "rbx, rcx, 0x3b"), + Q!(" mov " "[rsp + 0x8], rbx"), + Q!(" xor " "ebx, ebx"), + Q!(" mov " "rax, [rsp + 0x10]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" add " "rdi, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" mov " "rax, [rsp + 0x30]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rdi, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" shrd " "rbp, rdi, 0x3b"), + Q!(" mov " "[rsp + 0x28], rbp"), + Q!(" mov " "rax, [rsp + 0x18]"), + Q!(" xor " "rax, r9"), + Q!(" mov " "rbp, rax"), + Q!(" sar " "rbp, 0x3f"), + Q!(" and " "rbp, r8"), + Q!(" neg " "rbp"), + Q!(" mul " "r8"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" mov " "rax, [rsp + 0x38]"), + Q!(" xor " "rax, r11"), + Q!(" mov " "rdx, rax"), + Q!(" sar " "rdx, 0x3f"), + Q!(" and " "rdx, r10"), + Q!(" sub " "rbp, rdx"), + Q!(" mul " "r10"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" shrd " "rcx, rsi, 0x3b"), + Q!(" mov " "[rsp + 0x10], rcx"), + Q!(" shrd " "rsi, rbp, 0x3b"), + Q!(" mov " "rax, [rsp + 0x18]"), + Q!(" mov " "[rsp + 0x18], rsi"), + Q!(" xor " "rax, r13"), + Q!(" mov " "rsi, rax"), + Q!(" sar " "rsi, 0x3f"), + Q!(" and " "rsi, r12"), + Q!(" neg " "rsi"), + Q!(" mul " "r12"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "rax, [rsp + 0x38]"), + Q!(" xor " "rax, r15"), + Q!(" mov " "rdx, rax"), + Q!(" sar " "rdx, 0x3f"), + Q!(" and " "rdx, r14"), + Q!(" sub " "rsi, rdx"), + Q!(" mul " "r14"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" shrd " "rdi, rbx, 0x3b"), + Q!(" mov " "[rsp + 0x30], rdi"), + Q!(" shrd " "rbx, rsi, 0x3b"), + Q!(" mov " "[rsp + 0x38], rbx"), + Q!(" mov " "rbx, [rsp + 0x80]"), + Q!(" mov " "rbp, [rsp + 0x88]"), + Q!(" xor " "ecx, ecx"), + Q!(" mov " "rax, [rsp + 0x40]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" mov " "rax, [rsp + 0x60]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" xor " "esi, esi"), + Q!(" mov " "rax, [rsp + 0x40]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" mov " "[rsp + 0x40], rbx"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "rax, [rsp + 0x60]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "[rsp + 0x60], rbp"), + Q!(" xor " "ebx, ebx"), + Q!(" mov " "rax, [rsp + 0x48]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" mov " "rax, [rsp + 0x68]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "rax, [rsp + 0x48]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" mov " "[rsp + 0x48], rcx"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" mov " "rax, [rsp + 0x68]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rbp, rdx"), + Q!(" mov " "[rsp + 0x68], rsi"), + Q!(" xor " "ecx, ecx"), + Q!(" mov " "rax, [rsp + 0x50]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" mov " "rax, [rsp + 0x70]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "rbx, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" xor " "esi, esi"), + Q!(" mov " "rax, [rsp + 0x50]"), + Q!(" xor " "rax, r13"), + Q!(" mul " "r12"), + Q!(" mov " "[rsp + 0x50], rbx"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "rax, [rsp + 0x70]"), + Q!(" xor " "rax, r15"), + Q!(" mul " "r14"), + Q!(" add " "rbp, rax"), + Q!(" adc " "rsi, rdx"), + Q!(" mov " "[rsp + 0x70], rbp"), + Q!(" mov " "rax, [rsp + 0x58]"), + Q!(" xor " "rax, r9"), + Q!(" mov " "rbx, r9"), + Q!(" and " "rbx, r8"), + Q!(" neg " "rbx"), + Q!(" mul " "r8"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rbx, rdx"), + Q!(" mov " "rax, [rsp + 0x78]"), + Q!(" xor " "rax, r11"), + Q!(" mov " "rdx, r11"), + Q!(" and " "rdx, r10"), + Q!(" sub " "rbx, rdx"), + Q!(" mul " "r10"), + Q!(" add " "rcx, rax"), + Q!(" adc " "rdx, rbx"), + Q!(" mov " "rbx, rdx"), + Q!(" shld " "rdx, rcx, 0x1"), + Q!(" sar " "rbx, 0x3f"), + Q!(" add " "rdx, rbx"), + Q!(" mov " "eax, 0x13"), + Q!(" imul " "rdx"), + Q!(" mov " "r8, [rsp + 0x40]"), + Q!(" add " "r8, rax"), + Q!(" mov " "[rsp + 0x40], r8"), + Q!(" mov " "r8, [rsp + 0x48]"), + Q!(" adc " "r8, rdx"), + Q!(" mov " "[rsp + 0x48], r8"), + Q!(" mov " "r8, [rsp + 0x50]"), + Q!(" adc " "r8, rbx"), + Q!(" mov " "[rsp + 0x50], r8"), + Q!(" adc " "rcx, rbx"), + Q!(" shl " "rax, 0x3f"), + Q!(" add " "rcx, rax"), + Q!(" mov " "rax, [rsp + 0x58]"), + Q!(" mov " "[rsp + 0x58], rcx"), + Q!(" xor " "rax, r13"), + Q!(" mov " "rcx, r13"), + Q!(" and " "rcx, r12"), + Q!(" neg " "rcx"), + Q!(" mul " "r12"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rcx, rdx"), + Q!(" mov " "rax, [rsp + 0x78]"), + Q!(" xor " "rax, r15"), + Q!(" mov " "rdx, r15"), + Q!(" and " "rdx, r14"), + Q!(" sub " "rcx, rdx"), + Q!(" mul " "r14"), + Q!(" add " "rsi, rax"), + Q!(" adc " "rdx, rcx"), + Q!(" mov " "rcx, rdx"), + Q!(" shld " "rdx, rsi, 0x1"), + Q!(" sar " "rcx, 0x3f"), + Q!(" mov " "eax, 0x13"), + Q!(" add " "rdx, rcx"), + Q!(" imul " "rdx"), + Q!(" mov " "r8, [rsp + 0x60]"), + Q!(" add " "r8, rax"), + Q!(" mov " "[rsp + 0x60], r8"), + Q!(" mov " "r8, [rsp + 0x68]"), + Q!(" adc " "r8, rdx"), + Q!(" mov " "[rsp + 0x68], r8"), + Q!(" mov " "r8, [rsp + 0x70]"), + Q!(" adc " "r8, rcx"), + Q!(" mov " "[rsp + 0x70], r8"), + Q!(" adc " "rsi, rcx"), + Q!(" shl " "rax, 0x3f"), + Q!(" add " "rsi, rax"), + Q!(" mov " "[rsp + 0x78], rsi"), + Q!(Label!("edwards25519_scalarmuldouble_midloop", 7) ":"), + Q!(" mov " "rsi, [rsp + 0x98]"), + Q!(" mov " "rdx, [rsp]"), + Q!(" mov " "rcx, [rsp + 0x20]"), + Q!(" mov " "rbx, rdx"), + Q!(" and " "rbx, 0xfffff"), + Q!(" movabs " "rax, 0xfffffe0000000000"), + Q!(" or " "rbx, rax"), + Q!(" and " "rcx, 0xfffff"), + Q!(" movabs " "rax, 0xc000000000000000"), + Q!(" or " "rcx, rax"), + Q!(" mov " "rax, 0xfffffffffffffffe"), + Q!(" xor " "ebp, ebp"), + Q!(" mov " "edx, 0x2"), + Q!(" mov " "rdi, rbx"), + Q!(" mov " "r8, rax"), + Q!(" test " "rsi, rsi"), + Q!(" cmovs " "r8, rbp"), + Q!(" test " "rcx, 0x1"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" sar " "rcx, 1"), + Q!(" mov " "eax, 0x100000"), + Q!(" lea " "rdx, [rbx + rax]"), + Q!(" lea " "rdi, [rcx + rax]"), + Q!(" shl " "rdx, 0x16"), + Q!(" shl " "rdi, 0x16"), + Q!(" sar " "rdx, 0x2b"), + Q!(" sar " "rdi, 0x2b"), + Q!(" movabs " "rax, 0x20000100000"), + Q!(" lea " "rbx, [rbx + rax]"), + Q!(" lea " "rcx, [rcx + rax]"), + Q!(" sar " "rbx, 0x2a"), + Q!(" sar " "rcx, 0x2a"), + Q!(" mov " "[rsp + 0xa0], rdx"), + Q!(" mov " "[rsp + 0xa8], rbx"), + Q!(" mov " "[rsp + 0xb0], rdi"), + Q!(" mov " "[rsp + 0xb8], rcx"), + Q!(" mov " "r12, [rsp]"), + Q!(" imul " "rdi, r12"), + Q!(" imul " "r12, rdx"), + Q!(" mov " "r13, [rsp + 0x20]"), + Q!(" imul " "rbx, r13"), + Q!(" imul " "r13, rcx"), + Q!(" add " "r12, rbx"), + Q!(" add " "r13, rdi"), + Q!(" sar " "r12, 0x14"), + Q!(" sar " "r13, 0x14"), + Q!(" mov " "rbx, r12"), + Q!(" and " "rbx, 0xfffff"), + Q!(" movabs " "rax, 0xfffffe0000000000"), + Q!(" or " "rbx, rax"), + Q!(" mov " "rcx, r13"), + Q!(" and " "rcx, 0xfffff"), + Q!(" movabs " "rax, 0xc000000000000000"), + Q!(" or " "rcx, rax"), + Q!(" mov " "rax, 0xfffffffffffffffe"), + Q!(" mov " "edx, 0x2"), + Q!(" mov " "rdi, rbx"), + Q!(" mov " "r8, rax"), + Q!(" test " "rsi, rsi"), + Q!(" cmovs " "r8, rbp"), + Q!(" test " "rcx, 0x1"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" sar " "rcx, 1"), + Q!(" mov " "eax, 0x100000"), + Q!(" lea " "r8, [rbx + rax]"), + Q!(" lea " "r10, [rcx + rax]"), + Q!(" shl " "r8, 0x16"), + Q!(" shl " "r10, 0x16"), + Q!(" sar " "r8, 0x2b"), + Q!(" sar " "r10, 0x2b"), + Q!(" movabs " "rax, 0x20000100000"), + Q!(" lea " "r15, [rbx + rax]"), + Q!(" lea " "r11, [rcx + rax]"), + Q!(" sar " "r15, 0x2a"), + Q!(" sar " "r11, 0x2a"), + Q!(" mov " "rbx, r13"), + Q!(" mov " "rcx, r12"), + Q!(" imul " "r12, r8"), + Q!(" imul " "rbx, r15"), + Q!(" add " "r12, rbx"), + Q!(" imul " "r13, r11"), + Q!(" imul " "rcx, r10"), + Q!(" add " "r13, rcx"), + Q!(" sar " "r12, 0x14"), + Q!(" sar " "r13, 0x14"), + Q!(" mov " "rbx, r12"), + Q!(" and " "rbx, 0xfffff"), + Q!(" movabs " "rax, 0xfffffe0000000000"), + Q!(" or " "rbx, rax"), + Q!(" mov " "rcx, r13"), + Q!(" and " "rcx, 0xfffff"), + Q!(" movabs " "rax, 0xc000000000000000"), + Q!(" or " "rcx, rax"), + Q!(" mov " "rax, [rsp + 0xa0]"), + Q!(" imul " "rax, r8"), + Q!(" mov " "rdx, [rsp + 0xb0]"), + Q!(" imul " "rdx, r15"), + Q!(" imul " "r8, [rsp + 0xa8]"), + Q!(" imul " "r15, [rsp + 0xb8]"), + Q!(" add " "r15, r8"), + Q!(" lea " "r9, [rax + rdx]"), + Q!(" mov " "rax, [rsp + 0xa0]"), + Q!(" imul " "rax, r10"), + Q!(" mov " "rdx, [rsp + 0xb0]"), + Q!(" imul " "rdx, r11"), + Q!(" imul " "r10, [rsp + 0xa8]"), + Q!(" imul " "r11, [rsp + 0xb8]"), + Q!(" add " "r11, r10"), + Q!(" lea " "r13, [rax + rdx]"), + Q!(" mov " "rax, 0xfffffffffffffffe"), + Q!(" mov " "edx, 0x2"), + Q!(" mov " "rdi, rbx"), + Q!(" mov " "r8, rax"), + Q!(" test " "rsi, rsi"), + Q!(" cmovs " "r8, rbp"), + Q!(" test " "rcx, 0x1"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" cmovs " "r8, rbp"), + Q!(" mov " "rdi, rbx"), + Q!(" test " "rcx, rdx"), + Q!(" cmove " "r8, rbp"), + Q!(" cmove " "rdi, rbp"), + Q!(" sar " "rcx, 1"), + Q!(" xor " "rdi, r8"), + Q!(" xor " "rsi, r8"), + Q!(" bt " "r8, 0x3f"), + Q!(" cmovb " "rbx, rcx"), + Q!(" mov " "r8, rax"), + Q!(" sub " "rsi, rax"), + Q!(" lea " "rcx, [rcx + rdi]"), + Q!(" sar " "rcx, 1"), + Q!(" mov " "eax, 0x100000"), + Q!(" lea " "r8, [rbx + rax]"), + Q!(" lea " "r12, [rcx + rax]"), + Q!(" shl " "r8, 0x15"), + Q!(" shl " "r12, 0x15"), + Q!(" sar " "r8, 0x2b"), + Q!(" sar " "r12, 0x2b"), + Q!(" movabs " "rax, 0x20000100000"), + Q!(" lea " "r10, [rbx + rax]"), + Q!(" lea " "r14, [rcx + rax]"), + Q!(" sar " "r10, 0x2b"), + Q!(" sar " "r14, 0x2b"), + Q!(" mov " "rax, r9"), + Q!(" imul " "rax, r8"), + Q!(" mov " "rdx, r13"), + Q!(" imul " "rdx, r10"), + Q!(" imul " "r8, r15"), + Q!(" imul " "r10, r11"), + Q!(" add " "r10, r8"), + Q!(" lea " "r8, [rax + rdx]"), + Q!(" mov " "rax, r9"), + Q!(" imul " "rax, r12"), + Q!(" mov " "rdx, r13"), + Q!(" imul " "rdx, r14"), + Q!(" imul " "r12, r15"), + Q!(" imul " "r14, r11"), + Q!(" add " "r14, r12"), + Q!(" lea " "r12, [rax + rdx]"), + Q!(" mov " "[rsp + 0x98], rsi"), + Q!(" dec " "QWORD PTR [rsp + 0x90]"), + Q!(" jne " Label!("edwards25519_scalarmuldouble_inverseloop", 8, Before)), + Q!(" mov " "rax, [rsp]"), + Q!(" mov " "rcx, [rsp + 0x20]"), + Q!(" imul " "rax, r8"), + Q!(" imul " "rcx, r10"), + Q!(" add " "rax, rcx"), + Q!(" sar " "rax, 0x3f"), + Q!(" mov " "r9, r8"), + Q!(" sar " "r9, 0x3f"), + Q!(" xor " "r8, r9"), + Q!(" sub " "r8, r9"), + Q!(" xor " "r9, rax"), + Q!(" mov " "r11, r10"), + Q!(" sar " "r11, 0x3f"), + Q!(" xor " "r10, r11"), + Q!(" sub " "r10, r11"), + Q!(" xor " "r11, rax"), + Q!(" mov " "r13, r12"), + Q!(" sar " "r13, 0x3f"), + Q!(" xor " "r12, r13"), + Q!(" sub " "r12, r13"), + Q!(" xor " "r13, rax"), + Q!(" mov " "r15, r14"), + Q!(" sar " "r15, 0x3f"), + Q!(" xor " "r14, r15"), + Q!(" sub " "r14, r15"), + Q!(" xor " "r15, rax"), + Q!(" mov " "rax, r8"), + Q!(" and " "rax, r9"), + Q!(" mov " "r12, r10"), + Q!(" and " "r12, r11"), + Q!(" add " "r12, rax"), + Q!(" xor " "r13d, r13d"), + Q!(" mov " "rax, [rsp + 0x40]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "r12, rax"), + Q!(" adc " "r13, rdx"), + Q!(" mov " "rax, [rsp + 0x60]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "r12, rax"), + Q!(" adc " "r13, rdx"), + Q!(" xor " "r14d, r14d"), + Q!(" mov " "rax, [rsp + 0x48]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "r13, rax"), + Q!(" adc " "r14, rdx"), + Q!(" mov " "rax, [rsp + 0x68]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "r13, rax"), + Q!(" adc " "r14, rdx"), + Q!(" xor " "r15d, r15d"), + Q!(" mov " "rax, [rsp + 0x50]"), + Q!(" xor " "rax, r9"), + Q!(" mul " "r8"), + Q!(" add " "r14, rax"), + Q!(" adc " "r15, rdx"), + Q!(" mov " "rax, [rsp + 0x70]"), + Q!(" xor " "rax, r11"), + Q!(" mul " "r10"), + Q!(" add " "r14, rax"), + Q!(" adc " "r15, rdx"), + Q!(" mov " "rax, [rsp + 0x58]"), + Q!(" xor " "rax, r9"), + Q!(" and " "r9, r8"), + Q!(" neg " "r9"), + Q!(" mul " "r8"), + Q!(" add " "r15, rax"), + Q!(" adc " "r9, rdx"), + Q!(" mov " "rax, [rsp + 0x78]"), + Q!(" xor " "rax, r11"), + Q!(" mov " "rdx, r11"), + Q!(" and " "rdx, r10"), + Q!(" sub " "r9, rdx"), + Q!(" mul " "r10"), + Q!(" add " "r15, rax"), + Q!(" adc " "r9, rdx"), + Q!(" mov " "rax, r9"), + Q!(" shld " "rax, r15, 0x1"), + Q!(" sar " "r9, 0x3f"), + Q!(" mov " "ebx, 0x13"), + Q!(" lea " "rax, [rax + r9 + 0x1]"), + Q!(" imul " "rbx"), + Q!(" xor " "ebp, ebp"), + Q!(" add " "r12, rax"), + Q!(" adc " "r13, rdx"), + Q!(" adc " "r14, r9"), + Q!(" adc " "r15, r9"), + Q!(" shl " "rax, 0x3f"), + Q!(" add " "r15, rax"), + Q!(" cmovns " "rbx, rbp"), + Q!(" sub " "r12, rbx"), + Q!(" sbb " "r13, rbp"), + Q!(" sbb " "r14, rbp"), + Q!(" sbb " "r15, rbp"), + Q!(" btr " "r15, 0x3f"), + Q!(" mov " "rdi, [rsp + 0xc0]"), + Q!(" mov " "[rdi], r12"), + Q!(" mov " "[rdi + 0x8], r13"), + Q!(" mov " "[rdi + 0x10], r14"), + Q!(" mov " "[rdi + 0x18], r15"), + + // Store result + + Q!(" mov " "rdi, " res!()), + Q!(" lea " "rsi, [rsp + " ACC!() "]"), + Q!(" lea " "rbp, [rsp + " TAB!() "]"), + mul_p25519!(x_0!(), x_1!(), x_2!()), + + Q!(" mov " "rdi, " res!()), + Q!(" add " "rdi, 32"), + Q!(" lea " "rsi, [rsp + " ACC!() "+ 32]"), + Q!(" lea " "rbp, [rsp + " TAB!() "]"), + mul_p25519!(x_0!(), x_1!(), x_2!()), + + // Restore stack and registers + + Q!(" add " "rsp, " NSPACE!()), + + Q!(" pop " "r15"), + Q!(" pop " "r14"), + Q!(" pop " "r13"), + Q!(" pop " "r12"), + Q!(" pop " "rbp"), + Q!(" pop " "rbx"), + // proc hoisting in -> ret after edwards25519_scalarmuldouble_pepadd + Q!(" jmp " Label!("hoist_finish", 9, After)), + + // **************************************************************************** + // Localized versions of subroutines. + // These are close to the standalone functions "edwards25519_epdouble" etc., + // but are only maintaining reduction modulo 2^256 - 38, not 2^255 - 19. + // **************************************************************************** + + Q!(Label!("edwards25519_scalarmuldouble_epdouble", 2) ":"), + Q!(" sub " "rsp, (5 * " NUMSIZE!() ")"), + add_twice4!(t0!(), x_1!(), y_1!()), + sqr_4!(t1!(), z_1!()), + sqr_4!(t2!(), x_1!()), + sqr_4!(t3!(), y_1!()), + double_twice4!(t1!(), t1!()), + sqr_4!(t0!(), t0!()), + add_twice4!(t4!(), t2!(), t3!()), + sub_twice4!(t2!(), t2!(), t3!()), + add_twice4!(t3!(), t1!(), t2!()), + sub_twice4!(t1!(), t4!(), t0!()), + mul_4!(y_0!(), t2!(), t4!()), + mul_4!(z_0!(), t3!(), t2!()), + mul_4!(w_0!(), t1!(), t4!()), + mul_4!(x_0!(), t1!(), t3!()), + Q!(" add " "rsp, (5 * " NUMSIZE!() ")"), + Q!(" ret " ), + + Q!(Label!("edwards25519_scalarmuldouble_pdouble", 6) ":"), + Q!(" sub " "rsp, (5 * " NUMSIZE!() ")"), + add_twice4!(t0!(), x_1!(), y_1!()), + sqr_4!(t1!(), z_1!()), + sqr_4!(t2!(), x_1!()), + sqr_4!(t3!(), y_1!()), + double_twice4!(t1!(), t1!()), + sqr_4!(t0!(), t0!()), + add_twice4!(t4!(), t2!(), t3!()), + sub_twice4!(t2!(), t2!(), t3!()), + add_twice4!(t3!(), t1!(), t2!()), + sub_twice4!(t1!(), t4!(), t0!()), + mul_4!(y_0!(), t2!(), t4!()), + mul_4!(z_0!(), t3!(), t2!()), + mul_4!(x_0!(), t1!(), t3!()), + Q!(" add " "rsp, (5 * " NUMSIZE!() ")"), + Q!(" ret " ), + + Q!(Label!("edwards25519_scalarmuldouble_epadd", 3) ":"), + Q!(" sub " "rsp, (6 * " NUMSIZE!() ")"), + mul_4!(t0!(), w_1!(), w_2!()), + sub_twice4!(t1!(), y_1!(), x_1!()), + sub_twice4!(t2!(), y_2!(), x_2!()), + add_twice4!(t3!(), y_1!(), x_1!()), + add_twice4!(t4!(), y_2!(), x_2!()), + double_twice4!(t5!(), z_2!()), + mul_4!(t1!(), t1!(), t2!()), + mul_4!(t3!(), t3!(), t4!()), + load_k25519!(t2!()), + mul_4!(t2!(), t2!(), t0!()), + mul_4!(t4!(), z_1!(), t5!()), + sub_twice4!(t0!(), t3!(), t1!()), + add_twice4!(t5!(), t3!(), t1!()), + sub_twice4!(t1!(), t4!(), t2!()), + add_twice4!(t3!(), t4!(), t2!()), + mul_4!(w_0!(), t0!(), t5!()), + mul_4!(x_0!(), t0!(), t1!()), + mul_4!(y_0!(), t3!(), t5!()), + mul_4!(z_0!(), t1!(), t3!()), + Q!(" add " "rsp, (6 * " NUMSIZE!() ")"), + Q!(" ret " ), + + Q!(Label!("edwards25519_scalarmuldouble_pepadd", 4) ":"), + Q!(" sub " "rsp, (6 * " NUMSIZE!() ")"), + double_twice4!(t0!(), z_1!()), + sub_twice4!(t1!(), y_1!(), x_1!()), + add_twice4!(t2!(), y_1!(), x_1!()), + mul_4!(t3!(), w_1!(), z_2!()), + mul_4!(t1!(), t1!(), x_2!()), + mul_4!(t2!(), t2!(), y_2!()), + sub_twice4!(t4!(), t0!(), t3!()), + add_twice4!(t0!(), t0!(), t3!()), + sub_twice4!(t5!(), t2!(), t1!()), + add_twice4!(t1!(), t2!(), t1!()), + mul_4!(z_0!(), t4!(), t0!()), + mul_4!(x_0!(), t5!(), t4!()), + mul_4!(y_0!(), t0!(), t1!()), + mul_4!(w_0!(), t5!(), t1!()), + Q!(" add " "rsp, (6 * " NUMSIZE!() ")"), + Q!(" ret " ), + Q!(Label!("hoist_finish", 9) ":"), + inout("rdi") res.as_mut_ptr() => _, + inout("rsi") scalar.as_ptr() => _, + inout("rdx") point.as_ptr() => _, + inout("rcx") bscalar.as_ptr() => _, + edwards25519_scalarmuldouble_table = sym edwards25519_scalarmuldouble_table, + // clobbers + out("r10") _, + out("r11") _, + out("r12") _, + out("r13") _, + out("r14") _, + out("r15") _, + out("r8") _, + out("r9") _, + out("rax") _, + ) + }; +} + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +static edwards25519_scalarmuldouble_table: [u64; 96] = [ + // 1 * G + 0x9d103905d740913e, + 0xfd399f05d140beb3, + 0xa5c18434688f8a09, + 0x44fd2f9298f81267, + 0x2fbc93c6f58c3b85, + 0xcf932dc6fb8c0e19, + 0x270b4898643d42c2, + 0x07cf9d3a33d4ba65, + 0xabc91205877aaa68, + 0x26d9e823ccaac49e, + 0x5a1b7dcbdd43598c, + 0x6f117b689f0c65a8, + // 2 * G + 0x8a99a56042b4d5a8, + 0x8f2b810c4e60acf6, + 0xe09e236bb16e37aa, + 0x6bb595a669c92555, + 0x9224e7fc933c71d7, + 0x9f469d967a0ff5b5, + 0x5aa69a65e1d60702, + 0x590c063fa87d2e2e, + 0x43faa8b3a59b7a5f, + 0x36c16bdd5d9acf78, + 0x500fa0840b3d6a31, + 0x701af5b13ea50b73, + // 3 * G + 0x56611fe8a4fcd265, + 0x3bd353fde5c1ba7d, + 0x8131f31a214bd6bd, + 0x2ab91587555bda62, + 0xaf25b0a84cee9730, + 0x025a8430e8864b8a, + 0xc11b50029f016732, + 0x7a164e1b9a80f8f4, + 0x14ae933f0dd0d889, + 0x589423221c35da62, + 0xd170e5458cf2db4c, + 0x5a2826af12b9b4c6, + // 4 * G + 0x95fe050a056818bf, + 0x327e89715660faa9, + 0xc3e8e3cd06a05073, + 0x27933f4c7445a49a, + 0x287351b98efc099f, + 0x6765c6f47dfd2538, + 0xca348d3dfb0a9265, + 0x680e910321e58727, + 0x5a13fbe9c476ff09, + 0x6e9e39457b5cc172, + 0x5ddbdcf9102b4494, + 0x7f9d0cbf63553e2b, + // 5 * G + 0x7f9182c3a447d6ba, + 0xd50014d14b2729b7, + 0xe33cf11cb864a087, + 0x154a7e73eb1b55f3, + 0xa212bc4408a5bb33, + 0x8d5048c3c75eed02, + 0xdd1beb0c5abfec44, + 0x2945ccf146e206eb, + 0xbcbbdbf1812a8285, + 0x270e0807d0bdd1fc, + 0xb41b670b1bbda72d, + 0x43aabe696b3bb69a, + // 6 * G + 0x499806b67b7d8ca4, + 0x575be28427d22739, + 0xbb085ce7204553b9, + 0x38b64c41ae417884, + 0x3a0ceeeb77157131, + 0x9b27158900c8af88, + 0x8065b668da59a736, + 0x51e57bb6a2cc38bd, + 0x85ac326702ea4b71, + 0xbe70e00341a1bb01, + 0x53e4a24b083bc144, + 0x10b8e91a9f0d61e3, + // 7 * G + 0xba6f2c9aaa3221b1, + 0x6ca021533bba23a7, + 0x9dea764f92192c3a, + 0x1d6edd5d2e5317e0, + 0x6b1a5cd0944ea3bf, + 0x7470353ab39dc0d2, + 0x71b2528228542e49, + 0x461bea69283c927e, + 0xf1836dc801b8b3a2, + 0xb3035f47053ea49a, + 0x529c41ba5877adf3, + 0x7a9fbb1c6a0f90a7, + // 8 * G + 0xe2a75dedf39234d9, + 0x963d7680e1b558f9, + 0x2c2741ac6e3c23fb, + 0x3a9024a1320e01c3, + 0x59b7596604dd3e8f, + 0x6cb30377e288702c, + 0xb1339c665ed9c323, + 0x0915e76061bce52f, + 0xe7c1f5d9c9a2911a, + 0xb8a371788bcca7d7, + 0x636412190eb62a32, + 0x26907c5c2ecc4e95, +]; diff --git a/graviola/src/low/x86_64/mod.rs b/graviola/src/low/x86_64/mod.rs index d8813eb9a..3c8ec48dd 100644 --- a/graviola/src/low/x86_64/mod.rs +++ b/graviola/src/low/x86_64/mod.rs @@ -26,6 +26,10 @@ pub(crate) mod bignum_kmul_16_32; pub(crate) mod bignum_kmul_32_64; pub(crate) mod bignum_ksqr_16_32; pub(crate) mod bignum_ksqr_32_64; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod bignum_madd_n25519; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod bignum_mod_n25519; pub(crate) mod bignum_mod_n256; pub(crate) mod bignum_mod_n384; pub(crate) mod bignum_modadd; @@ -41,6 +45,8 @@ pub(crate) mod bignum_montsqr_p256; pub(crate) mod bignum_montsqr_p384; pub(crate) mod bignum_mul; pub(crate) mod bignum_mux; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod bignum_neg_p25519; pub(crate) mod bignum_neg_p256; pub(crate) mod bignum_neg_p384; pub(crate) mod bignum_negmodinv; @@ -54,6 +60,12 @@ pub(crate) mod chacha20; pub(crate) mod cpu; pub(crate) mod curve25519_x25519; pub(crate) mod curve25519_x25519base; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod edwards25519_decode; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod edwards25519_scalarmulbase; +#[allow(dead_code)] // TODO(phlip9): remove +pub(crate) mod edwards25519_scalarmuldouble; pub(crate) mod ghash; pub(crate) mod p256_montjadd; pub(crate) mod p256_montjdouble; diff --git a/graviola/src/mid/ed25519.rs b/graviola/src/mid/ed25519.rs new file mode 100644 index 000000000..5b793b9e1 --- /dev/null +++ b/graviola/src/mid/ed25519.rs @@ -0,0 +1,681 @@ +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +#![allow(dead_code)] // TODO(phlip9): remove + +use crate::error::Error; +use crate::low; +use crate::mid::sha2::Sha512Context; +use crate::mid::util; + +/// The little-endian encoded order of the base-point `B`, +/// `L := 2^252 + 27742317777372353535851937790883648493`. +const ORDER: [u64; 4] = [ + 0x5812631a5cf5d3ed, + 0x14def9dea2f79cd6, + 0x0000000000000000, + 0x1000000000000000, +]; + +pub(crate) struct SigningKey { + seed: [u8; 32], + s: UnreducedScalar, + vk_bytes: [u8; 32], + prefix: [u8; 32], +} + +impl SigningKey { + pub(crate) fn from_seed(seed: &[u8; 32]) -> Self { + let _entry = low::Entry::new_secret(); + low::ct::secret_slice(seed); + + // Step: rfc8032 5.1.5.1, 5.1.5.2 + // `h := SHA512(seed)` + // `s := ed25519-clamp(h[0..32])` + // `prefix := h[32..64]` + let mut h = { + let mut ctx = Sha512Context::new(); + ctx.update(seed); + ctx.finish() + }; + let (s, prefix) = util::u8x64_split_u8x32x2_mut(&mut h); + // Mangle the scalar: + // + // + s[0] &= 248; // 0b1111_1000 + s[31] &= 127; // 0b0111_1111 + s[31] |= 64; // 0b0100_0000 + + // Step: rfc8032 5.1.5.3, 5.1.5.4 + // Compute `[s]B` and compress to get the public key bytes + let s = UnreducedScalar(util::little_endian_to_u64x4(&s)); + let vk_bytes = VerifyingKey::from_unreduced_scalar(&s).into_bytes(); + + Self { + seed: *seed, + s, + vk_bytes, + prefix: *prefix, + } + } + + /// `PureEd25519` signing + #[allow(non_snake_case)] + pub(crate) fn sign(&self, msg: &[u8]) -> [u8; 64] { + let _entry = low::Entry::new_secret(); + + // Step: rfc8032 5.1.6.2 + // Compute the deterministic nonce + // `r := SHA-512(dom2(F, C) || prefix || PH(msg)) mod L` + let r: Scalar = { + let r = ed25519_digest(&self.prefix, msg, &[]); + Scalar::reduce_from_u8x64_le_bytes(&r) + }; + + // Step: rfc8032 5.1.6.3 + // Compute the commitment point `R := [r]B`. + let R: EdwardsPoint = r.mulbase(); + + // `sig := (R || S)` + // Start by writing `R` into the first 32 bytes of `sig`. + let mut sig = [0u8; 64]; + let (sig_R, sig_S) = util::u8x64_split_u8x32x2_mut(&mut sig); + R.compress_into(sig_R); + + // Step: rfc8032 5.1.6.4 + // Compute the challenge `k := SHA512(dom2(F, C) || R || A || PH(msg)) mod L` + let k: Scalar = { + let k = ed25519_digest(sig_R, &self.vk_bytes, msg); + Scalar::reduce_from_u8x64_le_bytes(&k) + }; + + // Step: rfc8032 5.1.6.5 + // Compute the proof `S := (k * s + r) mod L` + let S: Scalar = Scalar::madd_n25519(&k.0, &self.s.0, &r.0); + let S_bytes = S.to_le_bytes(); + *sig_S = S_bytes; + + low::ct::into_public(sig) + } + + // TODO(phlip9): unhack + fn verifying_key(&self) -> VerifyingKey { + VerifyingKey(self.vk_bytes) + } +} + +impl Drop for SigningKey { + fn drop(&mut self) { + low::zeroise(&mut self.seed); + low::zeroise(&mut self.s.0); + low::zeroise(&mut self.prefix); + } +} + +// TODO(phlip9): distinguish between unparsed and expanded verifying key +pub(crate) struct VerifyingKey([u8; 32]); + +impl VerifyingKey { + /// `PureEd25519` signature verification + #[allow(non_snake_case)] + pub(crate) fn verify(&self, sig: &[u8; 64], msg: &[u8]) -> Result<(), Error> { + let _entry = low::Entry::new_public(); + + // Step: rfc8032 5.1.7.1 + let A = EdwardsPoint::decompress_from(&self.0)?; + let (R_sig, S) = util::u8x64_split_u8x32x2_ref(sig); + + // S must be in the range [0, order) to prevent signature malleability. + let S = Scalar::try_from_le_bytes(S).ok_or(Error::BadSignature)?; + + // Step: rfc8032 5.1.7.2 + // Compute the challenge `k := SHA512(dom2(F, C) || R || A || PH(msg))` + let k = { + let k = ed25519_digest(R_sig, &self.0, msg); + Scalar::reduce_from_u8x64_le_bytes(&k) + }; + + // Step: rfc8032 5.1.7.3 + // Compute `R := [S]B - [k]A`. + let A_neg = A.negate(); + let R_have = EdwardsPoint::scalarmuldouble(&k, &A_neg, &S).compress(); + + if R_sig == &R_have.0 { + Ok(()) + } else { + Err(Error::BadSignature) + } + } + + fn into_bytes(self) -> [u8; 32] { + self.0 + } + + fn from_unreduced_scalar(scalar: &UnreducedScalar) -> Self { + let point = scalar.mulbase().compress(); + Self(low::ct::into_public(point.0)) + } +} + +/// In ed25519 format, the curve point (x, y) is determined by the +/// y-coordinate and the sign of x. +/// +/// The first 255 bits of a `CompressedEdwardsY` represent the y-coordinate. +/// The high bit of the 32nd byte gives the sign of x. +struct CompressedEdwardsY([u8; 32]); + +/// Represents a point (x, y) on the edwards25519 curve. `[0..4]` is the +/// x-coordinate and `[4..8]` is the y-coordinate. +struct EdwardsPoint([u64; 8]); + +impl EdwardsPoint { + /// The base-point `B` of the edwards25519 curve. + #[cfg(test)] + const BASE_POINT: Self = EdwardsPoint([ + // X(B) + 0xc9562d608f25d51a, + 0x692cc7609525a7b2, + 0xc0a4e231fdd6dc5c, + 0x216936d3cd6e53fe, + // Y(B) + 0x6666666666666658, + 0x6666666666666666, + 0x6666666666666666, + 0x6666666666666666, + ]); + + /// The identity point `O` of the edwards25519 curve. + #[cfg(test)] + const IDENTITY: Self = EdwardsPoint([ + // X(O) + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + // Y(O) + 0x0000000000000001, + 0x0000000000000000, + 0x0000000000000000, + 0x0000000000000000, + ]); + + /// Compute `A := [scalar]Point + [bscalar]B`. + fn scalarmuldouble(scalar: &Scalar, point: &Self, bscalar: &Scalar) -> Self { + let mut out = Self([0u64; 8]); + low::edwards25519_scalarmuldouble(&mut out.0, &scalar.0, &point.0, &bscalar.0); + out + } + + /// Compute `B := -A` for this curve point. + /// + /// Point negation for the twisted edwards curve when points are represented + /// in the extended coordinate system is simply: + /// -(X,Y,Z,T) = (-X,Y,Z,-T). + /// See "Twisted Edwards curves revisited": . + #[allow(non_snake_case)] + fn negate(mut self) -> Self { + let (X, _) = util::u64x8_split_u64x4x2_mut(&mut self.0); + let mut X_neg = [0u64; 4]; + low::bignum_neg_p25519(&mut X_neg, X); + *X = X_neg; + self + } + + /// Try to decompress a curve point from input bytes. + /// Returns `Err(Error::NotOnCurve)` if the input is not reduced, not + /// on the curve, or not canonically encoded. + fn decompress_from(compressed: &[u8; 32]) -> Result { + let mut point = Self([0u64; 8]); + if low::edwards25519_decode(&mut point.0, compressed) { + Ok(point) + } else { + Err(Error::NotOnCurve) + } + } + + /// Encode this edwards25519 point into a [`CompressedEdwardsY`]. + fn compress(&self) -> CompressedEdwardsY { + let mut out = CompressedEdwardsY([0u8; 32]); + self.compress_into(&mut out.0); + out + } + + /// Encode edwards25519 point into compressed form as a 256-bit number + /// + /// The output is a little-endian array of bytes corresponding to the + /// standard compressed encoding of a point as 2^255 * x_0 + y where + /// x_0 is the least significant bit of x. + /// See "https://datatracker.ietf.org/doc/html/rfc8032#section-5.1.2" + /// In this implementation, y is simply truncated to 255 bits, but if + /// it is reduced mod p_25519 as expected this does not affect values. + // + // Do this in Rust to avoid the pessimistic endian handling in the + // aarch64 s2n-bignum `edwards25519_encode` impl. + // + // godbolt: + fn compress_into(&self, out: &mut [u8; 32]) { + // Load lowest word of x coordinate + let p = &self.0; + let xb = p[0]; + // Load y coordinate as [y0, y1, y2, y3] + let y0 = p[4]; + let y1 = p[5]; + let y2 = p[6]; + let y3 = p[7]; + + // Compute the encoded form, making the LSB of x the MSB of the encoding + let y3 = (y3 & 0x7fffffffffffffff) | (xb << 63); + + out[0..8].copy_from_slice(&y0.to_le_bytes()); + out[8..16].copy_from_slice(&y1.to_le_bytes()); + out[16..24].copy_from_slice(&y2.to_le_bytes()); + out[24..32].copy_from_slice(&y3.to_le_bytes()); + } +} + +/// An unreduced 256-bit little-endian scalar. +// #[repr(transparent)] +struct UnreducedScalar([u64; 4]); + +impl UnreducedScalar { + fn from_le_bytes(x: &[u8; 32]) -> Self { + Self(util::little_endian_to_u64x4(x)) + } + + /// Scalar multiply this scalar by the base-point: `[self]B`. Conveniently + /// also reduces the scalar before multiplying. + fn mulbase(&self) -> EdwardsPoint { + let mut point = EdwardsPoint([0u64; 8]); + low::edwards25519_scalarmulbase(&mut point.0, &self.0); + point + } + + /// Reduce this 256-bit little-endian number modulo [`ORDER`]. + #[cfg(test)] + fn reduce(&self) -> Scalar { + let mut s = Scalar([0u64; 4]); + low::bignum_mod_n25519(&mut s.0, &self.0); + s + } +} + +/// A little-endian 256-bit scalar reduced modulo [`ORDER`]. +// #[repr(transparent)] +#[cfg_attr(test, derive(Debug))] +struct Scalar([u64; 4]); + +impl Scalar { + // TODO(phlip9): do I still need this? + // /// "Forget" that a scalar is reduced and cast it to an [`UnreducedScalar`]. + // #[allow(unsafe_code)] + // fn as_unreduced(&self) -> &UnreducedScalar { + // use std::mem::{align_of, size_of}; + // const _: [(); size_of::()] = [(); size_of::()]; + // const _: [(); align_of::()] = [(); align_of::()]; + // // Safety: both `Scalar` and `UnreducedScalar` are `#[repr(transparent)]` + // // with identical size and alignment. + // unsafe { &*(self as *const Self as *const UnreducedScalar) } + // } + + /// Read a 256-bit little-endian number from the public input bytes, + /// additionally verifying that it's a valid `Scalar` reduced modulo + /// [`ORDER`]. + // TODO(phlip9): bench against `low::bignum_cmp_lt(&s, &ORDER)`? + fn try_from_le_bytes(x: &[u8; 32]) -> Option { + use std::cmp::Ordering; + + // The first 3 bits of the last byte must be zero. + if x[31] & 224 /* 0b1110_0000 */ != 0 { + return None; + } + + let s = util::little_endian_to_u64x4(x); + for (s_i, o_i) in s.iter().zip(ORDER.iter()).rev() { + match s_i.cmp(o_i) { + Ordering::Less => return Some(Self(s)), + Ordering::Greater => return None, + Ordering::Equal => {} + } + } + None + } + + fn to_le_bytes(&self) -> [u8; 32] { + util::u64x4_to_little_endian(&self.0) + } + + /// Reduce a 512-bit little-endian scalar modulo [`ORDER`]. + fn reduce_from_u8x64_le_bytes(x: &[u8; 64]) -> Self { + let mut s = Self([0u64; 4]); + low::bignum_mod_n25519(&mut s.0, &util::little_endian_to_u64x8(&x)); + s + } + + /// Scalar multiply by the base-point: `[self]B` + fn mulbase(&self) -> EdwardsPoint { + // self.as_unreduced().mulbase() + let mut point = EdwardsPoint([0u64; 8]); + low::edwards25519_scalarmulbase(&mut point.0, &self.0); + point + } + + /// Compute `z := (x * y + c)` modulo [`ORDER`]. + fn madd_n25519(x: &[u64; 4], y: &[u64; 4], c: &[u64; 4]) -> Self { + let mut z = Self([0u64; 4]); + low::bignum_madd_n25519(&mut z.0, x, y, c); + z + } +} + +/// This is `H(..) := SHA-512(dom2(phflag, ctx) || ..)` from rfc8032 5.1, with +/// phflag=0 and ctx="". +fn ed25519_digest(x1: &[u8], x2: &[u8], x3: &[u8]) -> [u8; 64] { + let mut h = Sha512Context::new(); + h.update(x1); + h.update(x2); + if x3.len() > 0 { + h.update(x3); + } + h.finish() +} + +#[cfg(test)] +mod test { + use crate::{low::chacha20::ChaCha20, mid}; + + use super::*; + + /// `p := 2^255 - 19` + const P_25519: [u64; 4] = [ + 0xffffffffffffffed, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0x7fffffffffffffff, + ]; + + #[test] + fn test_rfc8032_test_vectors() { + // rfc8032 7.1.1 + let seed = b"\x9d\x61\xb1\x9d\xef\xfd\x5a\x60\xba\x84\x4a\xf4\x92\xec\x2c\xc4\x44\x49\xc5\x69\x7b\x32\x69\x19\x70\x3b\xac\x03\x1c\xae\x7f\x60"; + let sk = SigningKey::from_seed(&seed); + let msg = b""; + let sig = sk.sign(msg); + assert_eq!(&sk.vk_bytes, b"\xd7\x5a\x98\x01\x82\xb1\x0a\xb7\xd5\x4b\xfe\xd3\xc9\x64\x07\x3a\x0e\xe1\x72\xf3\xda\xa6\x23\x25\xaf\x02\x1a\x68\xf7\x07\x51\x1a"); + assert_eq!(&sig, b"\xe5\x56\x43\x00\xc3\x60\xac\x72\x90\x86\xe2\xcc\x80\x6e\x82\x8a\x84\x87\x7f\x1e\xb8\xe5\xd9\x74\xd8\x73\xe0\x65\x22\x49\x01\x55\x5f\xb8\x82\x15\x90\xa3\x3b\xac\xc6\x1e\x39\x70\x1c\xf9\xb4\x6b\xd2\x5b\xf5\xf0\x59\x5b\xbe\x24\x65\x51\x41\x43\x8e\x7a\x10\x0b"); + sk.verifying_key().verify(&sig, msg).unwrap(); + + // rfc8032 7.1.2 + let seed = b"\x4c\xcd\x08\x9b\x28\xff\x96\xda\x9d\xb6\xc3\x46\xec\x11\x4e\x0f\x5b\x8a\x31\x9f\x35\xab\xa6\x24\xda\x8c\xf6\xed\x4f\xb8\xa6\xfb"; + let sk = SigningKey::from_seed(&seed); + let msg = b"\x72"; + let sig = sk.sign(msg); + assert_eq!(&sk.vk_bytes, b"\x3d\x40\x17\xc3\xe8\x43\x89\x5a\x92\xb7\x0a\xa7\x4d\x1b\x7e\xbc\x9c\x98\x2c\xcf\x2e\xc4\x96\x8c\xc0\xcd\x55\xf1\x2a\xf4\x66\x0c"); + assert_eq!(&sig, b"\x92\xa0\x09\xa9\xf0\xd4\xca\xb8\x72\x0e\x82\x0b\x5f\x64\x25\x40\xa2\xb2\x7b\x54\x16\x50\x3f\x8f\xb3\x76\x22\x23\xeb\xdb\x69\xda\x08\x5a\xc1\xe4\x3e\x15\x99\x6e\x45\x8f\x36\x13\xd0\xf1\x1d\x8c\x38\x7b\x2e\xae\xb4\x30\x2a\xee\xb0\x0d\x29\x16\x12\xbb\x0c\x00"); + sk.verifying_key().verify(&sig, msg).unwrap(); + } + + #[test] + fn test_scalar_reduction() { + let zero = [0u64, 0, 0, 0]; + let one = [1u64, 0, 0, 0]; + + assert_eq!(zero, UnreducedScalar(zero).reduce().0); + assert_eq!(one, UnreducedScalar(one).reduce().0); + assert_eq!(zero, UnreducedScalar(ORDER).reduce().0); + + let order_m1 = [ORDER[0] - 1, ORDER[1], ORDER[2], ORDER[3]]; + let order_p1 = [ORDER[0] + 1, ORDER[1], ORDER[2], ORDER[3]]; + assert_eq!(order_m1, UnreducedScalar(order_m1).reduce().0); + assert_eq!(one, UnreducedScalar(order_p1).reduce().0); + + #[track_caller] + fn scalar_ok(x1: &[u64; 4]) { + let x1_bytes = util::u64x4_to_little_endian(x1); + let x2 = Scalar::try_from_le_bytes(&x1_bytes).map(|x| x.0); + assert_eq!(x2, Some(*x1)); + } + #[track_caller] + fn scalar_err(x1: &[u64; 4]) { + let x1_bytes = util::u64x4_to_little_endian(x1); + let x2 = Scalar::try_from_le_bytes(&x1_bytes).map(|x| x.0); + assert_eq!(x2, None); + } + + scalar_ok(&zero); + scalar_ok(&one); + scalar_ok(&order_m1); + + scalar_err(&ORDER); + scalar_err(&order_p1); + scalar_err(&[ORDER[0] - 1, ORDER[1], ORDER[2], ORDER[3] + 1]); + + let mut rng = TestRng::new(202505191702); + for _ in 0..100 { + let x_bytes = rng.next::<32>(); + let x = UnreducedScalar::from_le_bytes(&x_bytes); + + // [u8; 32] reduction + let x_reduced = x.reduce(); + let x_reduced_bytes = x_reduced.to_le_bytes(); + assert_eq!( + x_reduced.0, + Scalar::try_from_le_bytes(&x_reduced_bytes).unwrap().0, + ); + if x.0 != x_reduced.0 { + let res = Scalar::try_from_le_bytes(&x_bytes).map(|x| x.0); + assert_eq!(res, None); + } + + // [u8; 64] reduction + let mut x_bytes_64 = [0u8; 64]; + x_bytes_64[0..32].copy_from_slice(&x_bytes); + assert_eq!( + Scalar::reduce_from_u8x64_le_bytes(&x_bytes_64).0, + x_reduced.0, + ); + } + } + + #[test] + fn test_neg_p25519() { + fn is_reduced_mod_p25519(x: &[u64; 4]) -> bool { + low::bignum_cmp_lt(x, &P_25519) > 0 + } + fn neg_p25519(x: &[u64; 4]) -> [u64; 4] { + let mut z = [0u64; 4]; + low::bignum_neg_p25519(&mut z, x); + z + } + fn neg_p25519_alt(x: &[u64; 4]) -> [u64; 4] { + let mut z = [0u64; 4]; + low::bignum_modsub(&mut z, &[0; 4], x, &P_25519); + z + } + + let zero = [0u64; 4]; + let one = [1u64, 0, 0, 0]; + let p25519_m1 = [P_25519[0] - 1, P_25519[1], P_25519[2], P_25519[3]]; + + // -0 := 0 mod p25519 + assert_eq!(neg_p25519(&zero), zero); + // (-1) := (p25519 - 1) mod p25519 + assert_eq!(neg_p25519(&one), p25519_m1); + // -(p25519 - 1) := 1 mod p25519 + assert_eq!(neg_p25519(&p25519_m1), one); + + let mut rng = TestRng::new(202505192149); + for _ in 0..100 { + let x_bytes = rng.next::<32>(); + let x = util::little_endian_to_u64x4(&x_bytes); + if is_reduced_mod_p25519(&x) { + // x := -(-x) mod p25519 + assert_eq!(neg_p25519(&neg_p25519(&x)), x); + assert_eq!(neg_p25519(&x), neg_p25519_alt(&x)); + } + } + } + + #[test] + fn scalar_madd_n25519() { + const ZERO: [u64; 4] = [0u64; 4]; + const ONE: [u64; 4] = [1u64, 0, 0, 0]; + + fn madd_n25519_alt(x: &[u64; 4], y: &[u64; 4], c: &[u64; 4]) -> [u64; 4] { + let mut xy = [0u64; 8]; + low::bignum_mul(&mut xy, x, y); + let mut xy_reduced = [0u64; 4]; + low::bignum_mod_n25519(&mut xy_reduced, &xy); + let mut c_reduced = [0u64; 4]; + low::bignum_mod_n25519(&mut c_reduced, c); + let mut z = [0u64; 4]; + low::bignum_modadd(&mut z, &xy_reduced, &c_reduced, &ORDER); + z + } + + fn assert_basic_identities(x: &[u64; 4]) { + // x := (x * 1 + 0) mod p25519 + // x := (1 * x + 0) mod p25519 + assert_eq!( + Scalar::madd_n25519(x, &ONE, &ZERO).0, + UnreducedScalar(*x).reduce().0, + ); + assert_eq!( + Scalar::madd_n25519(&ONE, x, &ZERO).0, + UnreducedScalar(*x).reduce().0, + ); + + // 0 := (x * 0 + 0) mod p25519 + // 0 := (0 * x + 0) mod p25519 + assert_eq!(Scalar::madd_n25519(x, &ZERO, &ZERO).0, ZERO); + assert_eq!(Scalar::madd_n25519(&ZERO, x, &ZERO).0, ZERO); + + // x := (x * 0 + x) mod p25519 + // x := (0 * x + x) mod p25519 + assert_eq!( + Scalar::madd_n25519(x, &ZERO, x).0, + UnreducedScalar(*x).reduce().0, + ); + assert_eq!( + Scalar::madd_n25519(&ZERO, x, x).0, + UnreducedScalar(*x).reduce().0, + ); + } + + assert_eq!(ZERO, Scalar::madd_n25519(&ZERO, &ZERO, &ZERO).0); + assert_eq!(ONE, Scalar::madd_n25519(&ZERO, &ZERO, &ONE).0); + + assert_basic_identities(&ZERO); + assert_basic_identities(&ONE); + + let mut rng = TestRng::new(202505192230); + for _ in 0..100 { + let x = UnreducedScalar::from_le_bytes(&rng.next::<32>()).0; + let y = UnreducedScalar::from_le_bytes(&rng.next::<32>()).0; + let c = UnreducedScalar::from_le_bytes(&rng.next::<32>()).0; + + assert_basic_identities(&x); + assert_eq!( + Scalar::madd_n25519(&x, &y, &c).0, + madd_n25519_alt(&x, &y, &c) + ); + } + } + + #[test] + fn test_scalarmul() { + const ONE: [u64; 4] = [1u64, 0, 0, 0]; + + // O := [0]B + assert_eq!(EdwardsPoint::IDENTITY.0, Scalar([0; 4]).mulbase().0); + assert_eq!( + EdwardsPoint::IDENTITY.0, + EdwardsPoint::scalarmuldouble( + &Scalar([0; 4]), + &EdwardsPoint::BASE_POINT, + &Scalar([0; 4]), + ) + .0 + ); + assert_eq!( + EdwardsPoint::IDENTITY.0, + EdwardsPoint::scalarmuldouble( + &Scalar([0x69; 4]), + &EdwardsPoint::IDENTITY, + &Scalar([0; 4]), + ) + .0 + ); + + // B := [1]B + assert_eq!(EdwardsPoint::BASE_POINT.0, Scalar([1, 0, 0, 0]).mulbase().0); + + let mut rng = TestRng::new(202505192246); + for _ in 0..100 { + let bscalar = UnreducedScalar::from_le_bytes(&rng.next::<32>()).reduce(); + + // [bscalar]B := [bscalar]B + [0]B + assert_eq!( + bscalar.mulbase().0, + EdwardsPoint::scalarmuldouble( + &bscalar, + &EdwardsPoint::BASE_POINT, + &Scalar([0; 4]), + ).0, + ); + + let scalar = UnreducedScalar::from_le_bytes(&rng.next::<32>()).reduce(); + let sum = Scalar::madd_n25519(&scalar.0, &ONE, &bscalar.0); + + // [scalar + bscalar]B := [scalar]B + [bscalar]B + assert_eq!( + sum.mulbase().0, + EdwardsPoint::scalarmuldouble(&scalar, &EdwardsPoint::BASE_POINT, &bscalar).0, + ); + } + } + + #[test] + fn test_point_compression() { + assert_eq!( + EdwardsPoint::IDENTITY.0, + EdwardsPoint::decompress_from(b"\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00").unwrap().0, + ); + assert_eq!( + EdwardsPoint::BASE_POINT.0, + EdwardsPoint::decompress_from(b"\x58\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66").unwrap().0, + ); + + let mut rng = TestRng::new(202505192346); + for _ in 0..100 { + let scalar = UnreducedScalar::from_le_bytes(&rng.next::<32>()).reduce(); + let point = scalar.mulbase(); + let compressed = point.compress(); + let decompressed = EdwardsPoint::decompress_from(&compressed.0).unwrap(); + assert_eq!(point.0, decompressed.0); + } + } + + struct TestRng { + chacha: ChaCha20, + } + + impl TestRng { + fn new(seed: u64) -> Self { + let seed = sha256_digest(&seed.to_le_bytes()); + let nonce = [0; 16]; + let chacha = ChaCha20::new(&seed, &nonce); + Self { chacha } + } + + fn next(&mut self) -> [u8; N] { + let mut out = [0u8; N]; + self.chacha.cipher(&mut out); + out + } + } + + fn sha256_digest(x: &[u8]) -> [u8; 32] { + let mut ctx = mid::sha2::Sha256Context::new(); + ctx.update(x); + ctx.finish() + } +} diff --git a/graviola/src/mid/mod.rs b/graviola/src/mid/mod.rs index fccb7b2b7..555c7de35 100644 --- a/graviola/src/mid/mod.rs +++ b/graviola/src/mid/mod.rs @@ -5,6 +5,7 @@ pub(super) mod aes_gcm; pub(super) mod chacha20poly1305; +mod ed25519; pub(super) mod p256; pub(super) mod p384; pub(super) mod rng; diff --git a/graviola/src/mid/util.rs b/graviola/src/mid/util.rs index bfcfdd5dc..e110b592c 100644 --- a/graviola/src/mid/util.rs +++ b/graviola/src/mid/util.rs @@ -1,6 +1,8 @@ // Written for Graviola by Joe Birr-Pixton, 2024. // SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 +#![allow(dead_code)] // TODO(phlip9): remove + // Once const generics is completed this should be able to be // done better that way. @@ -35,6 +37,12 @@ little_endian!( little_endian_slice_to_u64x4, u64x4_to_little_endian ); +little_endian!( + [u64; 8], + little_endian_to_u64x8, + little_endian_slice_to_u64x8, + u64x8_to_little_endian +); macro_rules! big_endian { ([u64; $N:literal], $fn_array_to:ident, $fn_slice_to:ident, $fn_slice_any_size_to:ident, $fn_to_bytes:ident) => { @@ -109,3 +117,49 @@ big_endian!( big_endian_slice_any_size_to_u64x6, u64x6_to_big_endian ); + +// TODO(phlip9): MSRV 1.77 would allow us to use `split_first_chunk` and +// `split_last_chunk` to implement these helpers. + +#[allow(unsafe_code)] +pub(crate) const fn u8x64_split_u8x32x2_ref(x: &[u8; 64]) -> (&[u8; 32], &[u8; 32]) { + let ptr: *const u8 = x.as_ptr(); + // SAFETY: we're splitting `x` into two non-overlapping 32B refs. + // 1. Alignment: `x`, `x1`, and `x2` all have alignment = 1. + // 2. Bounds: `x1 = self[0..32]` and `x2 = self[32..64]` are in-bounds. + unsafe { + let x1 = &*(ptr as *const [u8; 32]); // x[0..32] + let x2 = &*(ptr.add(32) as *const [u8; 32]); // x[32..64] + (x1, x2) + } +} + +#[allow(unsafe_code)] +pub(crate) const fn u8x64_split_u8x32x2_mut(x: &mut [u8; 64]) -> (&mut [u8; 32], &mut [u8; 32]) { + let ptr: *mut u8 = x.as_mut_ptr(); + // SAFETY: we're splitting `x` into two non-overlapping 32B mut refs. + // 1. Alignment: `x`, `x1`, and `x2` all have alignment = 1. + // 2. Bounds: `x1 = self[0..32]` and `x2 = self[32..64]` are in-bounds. + // 3. Aliasing: `x1` and `x2` are disjoint/non-overlapping, so it's safe to + // return multiple mutable references. + unsafe { + let x1 = &mut *(ptr as *mut [u8; 32]); // x[0..32] + let x2 = &mut *(ptr.add(32) as *mut [u8; 32]); // x[32..64] + (x1, x2) + } +} + +#[allow(unsafe_code)] +pub(crate) const fn u64x8_split_u64x4x2_mut(x: &mut [u64; 8]) -> (&mut [u64; 4], &mut [u64; 4]) { + let ptr: *mut u64 = x.as_mut_ptr(); + // SAFETY: we're splitting `x` into two non-overlapping mut refs. + // 1. Alignment: `x`, `x1`, and `x2` all have alignment = 8. + // 2. Bounds: `x1 = self[0..4]` and `x2 = self[4..8]` are in-bounds. + // 3. Aliasing: `x1` and `x2` are disjoint/non-overlapping, so it's safe to + // return multiple mutable references. + unsafe { + let x1 = &mut *(ptr as *mut [u64; 4]); // x[0..4] + let x2 = &mut *(ptr.add(4) as *mut [u64; 4]); // x[4..8] + (x1, x2) + } +}