diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..c195df1d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "skyscraper-evm/lib/forge-std"] + path = skyscraper-evm/lib/forge-std + url = https://github.com/foundry-rs/forge-std diff --git a/skyscraper-evm/.gitignore b/skyscraper-evm/.gitignore new file mode 100644 index 00000000..85198aaa --- /dev/null +++ b/skyscraper-evm/.gitignore @@ -0,0 +1,14 @@ +# Compiler files +cache/ +out/ + +# Ignores development broadcast logs +!/broadcast +/broadcast/*/31337/ +/broadcast/**/dry-run/ + +# Docs +docs/ + +# Dotenv file +.env diff --git a/skyscraper-evm/README.md b/skyscraper-evm/README.md new file mode 100644 index 00000000..ddefdcca --- /dev/null +++ b/skyscraper-evm/README.md @@ -0,0 +1,21 @@ +## Skyscraper in EVM + +This is an optimized EVM implementation of SkyscraperV2 [0]. + +It comes in two flavors: `compress` and `compress_sigma`. The former has $σ = 1$ and the latter sets $σ$ to the value typical for Montgomery multiplication (which gives better native performance). + +The gas costs are approximately 1665 and 1906 gas respectively. Compare with other 64 byte to 32 byte hash functions: + +* SkyscraperV2: 1557 gas. +* SkyscraperV2 native friendly: 1798 gas. +* PosseidonV2: 14,934 gas [1] +* Posseidon: 21,124 gas [2] +* Keccak256: 266 gas +* Sha256: 495 gas +* Ripemd160: 1263 gas + +Analysis of the EVM assembly code shows that there is at most around 200 gas that can be optimized away with manual stack management. Despite not using inline assembly and manual inlining, the current implementation is already very close to the theoretical minimum gas cost when compiled with optimizations. + +[0]: https://eprint.iacr.org/2025/058 +[1]: https://github.com/zemse/poseidon2-evm +[2]: https://github.com/chancehudson/poseidon-solidity diff --git a/skyscraper-evm/foundry.lock b/skyscraper-evm/foundry.lock new file mode 100644 index 00000000..56436427 --- /dev/null +++ b/skyscraper-evm/foundry.lock @@ -0,0 +1,8 @@ +{ + "lib/forge-std": { + "tag": { + "name": "v1.10.0", + "rev": "8bbcf6e3f8f62f419e5429a0bd89331c85c37824" + } + } +} \ No newline at end of file diff --git a/skyscraper-evm/foundry.toml b/skyscraper-evm/foundry.toml new file mode 100644 index 00000000..75b2c61b --- /dev/null +++ b/skyscraper-evm/foundry.toml @@ -0,0 +1,14 @@ +[profile.default] +src = "src" +out = "out" +libs = ["lib"] + +# See more config options https://github.com/foundry-rs/foundry/blob/master/crates/config/README.md#all-options + +# Enable optimizer +optimizer = true + +# How aggressively to optimize (higher = more runtime-opt, bigger bytecode) +optimizer_runs = 10000000 + +via_ir = true diff --git a/skyscraper-evm/lib/forge-std b/skyscraper-evm/lib/forge-std new file mode 160000 index 00000000..8bbcf6e3 --- /dev/null +++ b/skyscraper-evm/lib/forge-std @@ -0,0 +1 @@ +Subproject commit 8bbcf6e3f8f62f419e5429a0bd89331c85c37824 diff --git a/skyscraper-evm/src/Skyscraper.sol b/skyscraper-evm/src/Skyscraper.sol new file mode 100644 index 00000000..a21b7eb7 --- /dev/null +++ b/skyscraper-evm/src/Skyscraper.sol @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.8.13; + +import {console} from "forge-std/console.sol"; + +contract Skyscraper { + // BN254 field modulus + uint256 internal constant P = + 21888242871839275222246405745257275088548364400416034343698204186575808495617; + + uint256 internal constant SIGMA_INV = + 9915499612839321149637521777990102151350674507940716049588462388200839649614; + + // Non-zero round constants + uint256 internal constant RC_1 = + 17829420340877239108687448009732280677191990375576158938221412342251481978692; + uint256 internal constant RC_2 = + 5852100059362614845584985098022261541909346143980691326489891671321030921585; + uint256 internal constant RC_3 = + 17048088173265532689680903955395019356591870902241717143279822196003888806966; + uint256 internal constant RC_4 = + 71577923540621522166602308362662170286605786204339342029375621502658138039; + uint256 internal constant RC_5 = + 1630526119629192105940988602003704216811347521589219909349181656165466494167; + uint256 internal constant RC_6 = + 7807402158218786806372091124904574238561123446618083586948014838053032654983; + uint256 internal constant RC_7 = + 13329560971460034925899588938593812685746818331549554971040309989641523590611; + uint256 internal constant RC_8 = + 16971509144034029782226530622087626979814683266929655790026304723118124142299; + uint256 internal constant RC_9 = + 8608910393531852188108777530736778805001620473682472554749734455948859886057; + uint256 internal constant RC_10 = + 10789906636021659141392066577070901692352605261812599600575143961478236801530; + uint256 internal constant RC_11 = + 18708129585851494907644197977764586873688181219062643217509404046560774277231; + uint256 internal constant RC_12 = + 8383317008589863184762767400375936634388677459538766150640361406080412989586; + uint256 internal constant RC_13 = + 10555553646766747611187318546907885054893417621612381305146047194084618122734; + uint256 internal constant RC_14 = + 18278062107303135832359716534360847832111250949377506216079581779892498540823; + uint256 internal constant RC_15 = + 9307964587880364850754205696017897664821998926660334400055925260019288889718; + uint256 internal constant RC_16 = + 13066217995902074168664295654459329310074418852039335279433003242098078040116; + + uint256 internal constant MASK_L1 = + 0x7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f; + uint256 internal constant MASK_H1 = + 0x8080808080808080808080808080808080808080808080808080808080808080; + uint256 internal constant MASK_L2 = + 0x3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F; + uint256 internal constant MASK_H2 = + 0xC0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0; + uint256 internal constant MASK_L3 = + 0x1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F; + uint256 internal constant MASK_H3 = + 0xE0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0; + + function compress(uint256 l, uint256 r) public pure returns (uint256) { + uint256 t = l; + (l, r) = permute(l, r); + return addmod(t, l, P); + } + + function compress_sigma( + uint256 l, + uint256 r + ) public pure returns (uint256) { + uint256 t = l; + (l, r) = permute_sigma(l, r); + return addmod(t, l, P); + } + + // SkyscraperV2 over Bn254 scalar field with no Montgomery factor. + // Requires l and r to be in the range [0, P-1]. + function permute( + uint256 l, + uint256 r + ) internal pure returns (uint256, uint256) { + (l, r) = ss(l, r, 0, RC_1); + (l, r) = ss(l, r, RC_2, RC_3); + (l, r) = ss_reduce_l(l, r, RC_4, RC_5); + (l, r) = bb(l, r, RC_6, RC_7); + (l, r) = ss_reduce_l(l, r, RC_8, RC_9); + (l, r) = bb(l, r, RC_10, RC_11); + (l, r) = ss(l, r, RC_12, RC_13); + (l, r) = ss(l, r, RC_14, RC_15); + (l, r) = ss(l, r, RC_16, 0); + return (l, r); + } + + // SkyscraperV2 over Bn254 scalar field with Montgomery factor. + // Requires l and r to be in the range [0, P-1]. + function permute_sigma( + uint256 l, + uint256 r + ) internal pure returns (uint256, uint256) { + (l, r) = sss(l, r, 0, RC_1); + (l, r) = sss(l, r, RC_2, RC_3); + (l, r) = sss_reduce_l(l, r, RC_4, RC_5); + (l, r) = bb(l, r, RC_6, RC_7); + (l, r) = sss_reduce_l(l, r, RC_8, RC_9); + (l, r) = bb(l, r, RC_10, RC_11); + (l, r) = sss(l, r, RC_12, RC_13); + (l, r) = sss(l, r, RC_14, RC_15); + (l, r) = sss(l, r, RC_16, 0); + return (l, r); + } + + function ss( + uint256 l, + uint256 r, + uint256 rc_a, + uint256 rc_b + ) internal pure returns (uint256, uint256) { + unchecked { + r = rc_a + addmod(mulmod(l, l, P), r, P); + l = rc_b + addmod(mulmod(r, r, P), l, P); + } + return (l, r); + } + + function ss_reduce_l( + uint256 l, + uint256 r, + uint256 rc_a, + uint256 rc_b + ) internal pure returns (uint256, uint256) { + unchecked { + r = rc_a + addmod(mulmod(l, l, P), r, P); + } + l = addmod(rc_b, addmod(mulmod(r, r, P), l, P), P); + return (l, r); + } + + function sss( + uint256 l, + uint256 r, + uint256 rc_a, + uint256 rc_b + ) internal pure returns (uint256, uint256) { + unchecked { + r = rc_a + addmod(mulmod(mulmod(l, l, P), SIGMA_INV, P), r, P); + l = rc_b + addmod(mulmod(mulmod(r, r, P), SIGMA_INV, P), l, P); + } + return (l, r); + } + + function sss_reduce_l( + uint256 l, + uint256 r, + uint256 rc_a, + uint256 rc_b + ) internal pure returns (uint256, uint256) { + unchecked { + r = rc_a + addmod(mulmod(mulmod(l, l, P), SIGMA_INV, P), r, P); + } + l = addmod( + rc_b, + addmod(mulmod(mulmod(r, r, P), SIGMA_INV, P), l, P), + P + ); + return (l, r); + } + + // Requires l to be reduced. + function bb( + uint256 l, + uint256 r, + uint256 rc_a, + uint256 rc_b + ) internal pure returns (uint256, uint256) { + uint256 x = (l << 128) | (l >> 128); // Rotate left by 128 bits + uint256 x1 = ((x & MASK_L1) << 1) | ((x & MASK_H1) >> 7); // Bytewise rotate left 1 + uint256 x2 = ((x1 & MASK_L1) << 1) | ((x1 & MASK_H1) >> 7); + uint256 x3 = x1 & x2; + uint256 x4 = ((x3 & MASK_L2) << 2) | ((x3 & MASK_H2) >> 6); + x = x1 ^ ((~x2) & x4); + r = addmod(rc_a, addmod(x, r, P), P); + + x = (r << 128) | (r >> 128); // Rotate left by 128 bits + x1 = ((x & MASK_L1) << 1) | ((x & MASK_H1) >> 7); // Bytewise rotate left 1 + x2 = ((x1 & MASK_L1) << 1) | ((x1 & MASK_H1) >> 7); + x3 = x1 & x2; + x4 = ((x3 & MASK_L2) << 2) | ((x3 & MASK_H2) >> 6); + x = x1 ^ ((~x2) & x4); + unchecked { + l = rc_b + addmod(x, l, P); + } + return (l, r); + } + + function bar(uint256 x) internal pure returns (uint256) { + x = (x << 128) | (x >> 128); // Rotate left by 128 bits + uint256 x1 = ((x & MASK_L1) << 1) | ((x & MASK_H1) >> 7); // Bytewise rotate left 1 + uint256 x2 = ((x1 & MASK_L1) << 1) | ((x1 & MASK_H1) >> 7); + uint256 x3 = x1 & x2; + uint256 x4 = ((x3 & MASK_L2) << 2) | ((x3 & MASK_H2) >> 6); // Bytewise rotate left 2 + return x1 ^ ((~x2) & x4); + } + + // SWAR 32-byte parallel SBOX. + function sbox(uint256 x) internal pure returns (uint256) { + uint256 x1 = ((x & MASK_L1) << 1) | ((x & MASK_H1) >> 7); + uint256 x2 = ((x1 & MASK_L1) << 1) | ((x1 & MASK_H1) >> 7); + uint256 t = x & x1; + t = ((t & MASK_L3) << 3) | ((t & MASK_H3) >> 5); + return x1 ^ ((~x2) & t); + } + + // Bitwise rotate a byte left one place, rotates 32 bytes in parallel using SWAR. + function rot1(uint256 x) internal pure returns (uint256) { + uint256 left = (x & MASK_L1) << 1; + uint256 right = (x & MASK_H1) >> 7; + return left | right; + } +} diff --git a/skyscraper-evm/test/Skyscraper.sol b/skyscraper-evm/test/Skyscraper.sol new file mode 100644 index 00000000..5e1c1161 --- /dev/null +++ b/skyscraper-evm/test/Skyscraper.sol @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.8.13; + +import {Test} from "forge-std/Test.sol"; +import {Skyscraper} from "../src/Skyscraper.sol"; +import {console} from "forge-std/console.sol"; + +contract SkyscraperTest is Test, Skyscraper { + function test_rot_1() public { + uint256 result = rot1(0x010203); + assertEq(result, 0x020406); + } + + function test_sbox() public { + uint256 result = sbox(0xcd1783142b1e); + assertEq(result, 0xd30e172846bc); + } + + function test_bar() public { + uint256 result = bar( + 13251711941470795978907268022756015766767985221093713388330058285942871890923 + ); + assertEq( + result % P, + 8538086118276539577536391439548092640553835458646834916786764569256164366265 + ); + } + + function test_ss_2() public { + uint256 l = 11818428481613126259506041491792444971306025298632020312923851211664140080269; + uint256 r = 16089984100220651117533376273482359701319211672522891227502963383930673183481; + (uint256 l_out, uint256 r_out) = sss(l, r, RC_2, RC_3); + assertEq( + l_out % P, + 2897520731550929941842826131888578795995028656093850302425034320680216166225 + ); + assertEq( + r_out % P, + 10274752619072178425540318899508997829349102488123199431506343228471746115261 + ); + } + + function test_bb_6() public { + uint256 l = 13251711941470795978907268022756015766767985221093713388330058285942871890923; + uint256 r = 1017722258958995329580328739423576514309327442471989504101393158056883989572; + (uint256 l_out, uint256 r_out) = bb(l, r, RC_6, RC_7); + assertEq( + l_out % P, + 3193610555912363022088172260048956988022957239290210718020144819371540058981 + ); + assertEq( + r_out % P, + 17363210535454321713488811303876243393424286347736908007836172565366081010820 + ); + } + + function test_zero() public { + (uint256 l, uint256 r) = permute_sigma(0, 0); + assertEq( + l % P, + 5793276905781313965269111743763131906666794041798623267477617572701829069290 + ); + assertEq( + r % P, + 12296274483727574983376829575121280934973829438414198530604912453551798647077 + ); + } + + function test_bench_ss() public { + uint256 startGas = gasleft(); + uint256 l = 0; + uint256 r = 0; + for (uint256 i = 0; i < 1000; i++) { + (l, r) = ss(l, r, RC_2, RC_3); + } + uint256 gasUsed = startGas - gasleft(); + emit log_named_uint("gas per call", gasUsed / 1000); + } + + function test_bench_ss_sigma() public { + uint256 startGas = gasleft(); + uint256 l = 0; + uint256 r = 0; + for (uint256 i = 0; i < 1000; i++) { + (l, r) = sss(l, r, RC_2, RC_3); + } + uint256 gasUsed = startGas - gasleft(); + emit log_named_uint("gas per call", gasUsed / 1000); + } + + function test_bench_bb() public { + uint256 startGas = gasleft(); + uint256 l = 0; + uint256 r = 0; + for (uint256 i = 0; i < 1000; i++) { + (l, r) = bb(l, r, RC_6, RC_7); + } + uint256 gasUsed = startGas - gasleft(); + emit log_named_uint("gas per call", gasUsed / 1000); + } + + function test_bench_compress() public { + uint256 startGas = gasleft(); + uint256 l = RC_5; + uint256 r = RC_8; + for (uint256 i = 0; i < 1000; i++) { + l = compress(l, r); + } + uint256 gasUsed = startGas - gasleft(); + emit log_named_uint("gas per call", gasUsed / 1000); + } + + function test_bench_compress_sigma() public { + uint256 startGas = gasleft(); + uint256 l = RC_5; + uint256 r = RC_8; + for (uint256 i = 0; i < 1000; i++) { + l = compress_sigma(l, r); + } + uint256 gasUsed = startGas - gasleft(); + emit log_named_uint("gas per call", gasUsed / 1000); + } + + function test_bench_sha3() public { + uint256 startGas = gasleft(); + uint256 l = RC_5; + uint256 r = RC_8; + for (uint256 i = 0; i < 1000; i++) { + l = uint256(keccak256(abi.encodePacked(l, r))); + } + uint256 gasUsed = startGas - gasleft(); + emit log_named_uint("gas per call", gasUsed / 1000); + } + + function test_bench_sha256() public { + uint256 startGas = gasleft(); + uint256 l = RC_5; + uint256 r = RC_8; + for (uint256 i = 0; i < 1000; i++) { + l = uint256(sha256(abi.encodePacked(l, r))); + } + uint256 gasUsed = startGas - gasleft(); + emit log_named_uint("gas per call", gasUsed / 1000); + } + + function test_bench_ripemd160() public { + uint256 startGas = gasleft(); + uint256 l = RC_5; + uint256 r = RC_8; + for (uint256 i = 0; i < 1000; i++) { + l = uint256(bytes32(ripemd160(abi.encodePacked(l, r)))); + } + uint256 gasUsed = startGas - gasleft(); + emit log_named_uint("gas per call", gasUsed / 1000); + } +}