Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "skyscraper-evm/lib/forge-std"]
path = skyscraper-evm/lib/forge-std
url = https://github.com/foundry-rs/forge-std
14 changes: 14 additions & 0 deletions skyscraper-evm/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Compiler files
cache/
out/

# Ignores development broadcast logs
!/broadcast
/broadcast/*/31337/
/broadcast/**/dry-run/

# Docs
docs/

# Dotenv file
.env
21 changes: 21 additions & 0 deletions skyscraper-evm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
## Skyscraper in EVM

This is an optimized EVM implementation of SkyscraperV2 [0].

It comes in two flavors: `compress` and `compress_sigma`. The former has $σ = 1$ and the latter sets $σ$ to the value typical for Montgomery multiplication (which gives better native performance).

The gas costs are approximately 1665 and 1906 gas respectively. Compare with other 64 byte to 32 byte hash functions:

* SkyscraperV2: 1557 gas.
* SkyscraperV2 native friendly: 1798 gas.
* PosseidonV2: 14,934 gas [1]
* Posseidon: 21,124 gas [2]
* Keccak256: 266 gas
* Sha256: 495 gas
* Ripemd160: 1263 gas

Analysis of the EVM assembly code shows that there is at most around 200 gas that can be optimized away with manual stack management. Despite not using inline assembly and manual inlining, the current implementation is already very close to the theoretical minimum gas cost when compiled with optimizations.

[0]: https://eprint.iacr.org/2025/058
[1]: https://github.com/zemse/poseidon2-evm
[2]: https://github.com/chancehudson/poseidon-solidity
8 changes: 8 additions & 0 deletions skyscraper-evm/foundry.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"lib/forge-std": {
"tag": {
"name": "v1.10.0",
"rev": "8bbcf6e3f8f62f419e5429a0bd89331c85c37824"
}
}
}
14 changes: 14 additions & 0 deletions skyscraper-evm/foundry.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[profile.default]
src = "src"
out = "out"
libs = ["lib"]

# See more config options https://github.com/foundry-rs/foundry/blob/master/crates/config/README.md#all-options

# Enable optimizer
optimizer = true

# How aggressively to optimize (higher = more runtime-opt, bigger bytecode)
optimizer_runs = 10000000

via_ir = true
1 change: 1 addition & 0 deletions skyscraper-evm/lib/forge-std
Submodule forge-std added at 8bbcf6
219 changes: 219 additions & 0 deletions skyscraper-evm/src/Skyscraper.sol
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
// SPDX-License-Identifier: UNLICENSED
pragma solidity ^0.8.13;

import {console} from "forge-std/console.sol";

contract Skyscraper {
// BN254 field modulus
uint256 internal constant P =
21888242871839275222246405745257275088548364400416034343698204186575808495617;

uint256 internal constant SIGMA_INV =
9915499612839321149637521777990102151350674507940716049588462388200839649614;

// Non-zero round constants
uint256 internal constant RC_1 =
17829420340877239108687448009732280677191990375576158938221412342251481978692;
uint256 internal constant RC_2 =
5852100059362614845584985098022261541909346143980691326489891671321030921585;
uint256 internal constant RC_3 =
17048088173265532689680903955395019356591870902241717143279822196003888806966;
uint256 internal constant RC_4 =
71577923540621522166602308362662170286605786204339342029375621502658138039;
uint256 internal constant RC_5 =
1630526119629192105940988602003704216811347521589219909349181656165466494167;
uint256 internal constant RC_6 =
7807402158218786806372091124904574238561123446618083586948014838053032654983;
uint256 internal constant RC_7 =
13329560971460034925899588938593812685746818331549554971040309989641523590611;
uint256 internal constant RC_8 =
16971509144034029782226530622087626979814683266929655790026304723118124142299;
uint256 internal constant RC_9 =
8608910393531852188108777530736778805001620473682472554749734455948859886057;
uint256 internal constant RC_10 =
10789906636021659141392066577070901692352605261812599600575143961478236801530;
uint256 internal constant RC_11 =
18708129585851494907644197977764586873688181219062643217509404046560774277231;
uint256 internal constant RC_12 =
8383317008589863184762767400375936634388677459538766150640361406080412989586;
uint256 internal constant RC_13 =
10555553646766747611187318546907885054893417621612381305146047194084618122734;
uint256 internal constant RC_14 =
18278062107303135832359716534360847832111250949377506216079581779892498540823;
uint256 internal constant RC_15 =
9307964587880364850754205696017897664821998926660334400055925260019288889718;
uint256 internal constant RC_16 =
13066217995902074168664295654459329310074418852039335279433003242098078040116;

uint256 internal constant MASK_L1 =
0x7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f;
uint256 internal constant MASK_H1 =
0x8080808080808080808080808080808080808080808080808080808080808080;
uint256 internal constant MASK_L2 =
0x3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F3F;
uint256 internal constant MASK_H2 =
0xC0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0C0;
uint256 internal constant MASK_L3 =
0x1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F1F;
uint256 internal constant MASK_H3 =
0xE0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0E0;

function compress(uint256 l, uint256 r) public pure returns (uint256) {
uint256 t = l;
(l, r) = permute(l, r);
return addmod(t, l, P);
}

function compress_sigma(
uint256 l,
uint256 r
) public pure returns (uint256) {
uint256 t = l;
(l, r) = permute_sigma(l, r);
return addmod(t, l, P);
}

// SkyscraperV2 over Bn254 scalar field with no Montgomery factor.
// Requires l and r to be in the range [0, P-1].
function permute(
uint256 l,
uint256 r
) internal pure returns (uint256, uint256) {
(l, r) = ss(l, r, 0, RC_1);
(l, r) = ss(l, r, RC_2, RC_3);
(l, r) = ss_reduce_l(l, r, RC_4, RC_5);
(l, r) = bb(l, r, RC_6, RC_7);
(l, r) = ss_reduce_l(l, r, RC_8, RC_9);
(l, r) = bb(l, r, RC_10, RC_11);
(l, r) = ss(l, r, RC_12, RC_13);
(l, r) = ss(l, r, RC_14, RC_15);
(l, r) = ss(l, r, RC_16, 0);
return (l, r);
}

// SkyscraperV2 over Bn254 scalar field with Montgomery factor.
// Requires l and r to be in the range [0, P-1].
function permute_sigma(
uint256 l,
uint256 r
) internal pure returns (uint256, uint256) {
(l, r) = sss(l, r, 0, RC_1);
(l, r) = sss(l, r, RC_2, RC_3);
(l, r) = sss_reduce_l(l, r, RC_4, RC_5);
(l, r) = bb(l, r, RC_6, RC_7);
(l, r) = sss_reduce_l(l, r, RC_8, RC_9);
(l, r) = bb(l, r, RC_10, RC_11);
(l, r) = sss(l, r, RC_12, RC_13);
(l, r) = sss(l, r, RC_14, RC_15);
(l, r) = sss(l, r, RC_16, 0);
return (l, r);
}

function ss(
uint256 l,
uint256 r,
uint256 rc_a,
uint256 rc_b
) internal pure returns (uint256, uint256) {
unchecked {
r = rc_a + addmod(mulmod(l, l, P), r, P);
l = rc_b + addmod(mulmod(r, r, P), l, P);
}
return (l, r);
}

function ss_reduce_l(
uint256 l,
uint256 r,
uint256 rc_a,
uint256 rc_b
) internal pure returns (uint256, uint256) {
unchecked {
r = rc_a + addmod(mulmod(l, l, P), r, P);
}
l = addmod(rc_b, addmod(mulmod(r, r, P), l, P), P);
return (l, r);
}

function sss(
uint256 l,
uint256 r,
uint256 rc_a,
uint256 rc_b
) internal pure returns (uint256, uint256) {
unchecked {
r = rc_a + addmod(mulmod(mulmod(l, l, P), SIGMA_INV, P), r, P);
l = rc_b + addmod(mulmod(mulmod(r, r, P), SIGMA_INV, P), l, P);
Comment on lines +145 to +146
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these addmod are safe to be replaced by regular adds. After mulmod there is 4.29p space left before a u256 overflows. Afterwards it directly gets fed into the mulmod in sss_reduce_l which in turn reduces it.

}
return (l, r);
}

function sss_reduce_l(
uint256 l,
uint256 r,
uint256 rc_a,
uint256 rc_b
) internal pure returns (uint256, uint256) {
unchecked {
r = rc_a + addmod(mulmod(mulmod(l, l, P), SIGMA_INV, P), r, P);
}
Comment on lines +157 to +159
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on this line it looks like r doesn't have to be reduced for bb so it should be fine to drop the addmod here. Same reasoning as above.

l = addmod(
rc_b,
addmod(mulmod(mulmod(r, r, P), SIGMA_INV, P), l, P),
P
);
Comment on lines +160 to +164
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, only the outer addmod should be required.

return (l, r);
}

// Requires l to be reduced.
function bb(
uint256 l,
uint256 r,
uint256 rc_a,
uint256 rc_b
) internal pure returns (uint256, uint256) {
uint256 x = (l << 128) | (l >> 128); // Rotate left by 128 bits
uint256 x1 = ((x & MASK_L1) << 1) | ((x & MASK_H1) >> 7); // Bytewise rotate left 1
uint256 x2 = ((x1 & MASK_L1) << 1) | ((x1 & MASK_H1) >> 7);
uint256 x3 = x1 & x2;
uint256 x4 = ((x3 & MASK_L2) << 2) | ((x3 & MASK_H2) >> 6);
x = x1 ^ ((~x2) & x4);
r = addmod(rc_a, addmod(x, r, P), P);

x = (r << 128) | (r >> 128); // Rotate left by 128 bits
x1 = ((x & MASK_L1) << 1) | ((x & MASK_H1) >> 7); // Bytewise rotate left 1
x2 = ((x1 & MASK_L1) << 1) | ((x1 & MASK_H1) >> 7);
x3 = x1 & x2;
x4 = ((x3 & MASK_L2) << 2) | ((x3 & MASK_H2) >> 6);
x = x1 ^ ((~x2) & x4);
unchecked {
l = rc_b + addmod(x, l, P);
}
return (l, r);
}

function bar(uint256 x) internal pure returns (uint256) {
x = (x << 128) | (x >> 128); // Rotate left by 128 bits
uint256 x1 = ((x & MASK_L1) << 1) | ((x & MASK_H1) >> 7); // Bytewise rotate left 1
uint256 x2 = ((x1 & MASK_L1) << 1) | ((x1 & MASK_H1) >> 7);
uint256 x3 = x1 & x2;
uint256 x4 = ((x3 & MASK_L2) << 2) | ((x3 & MASK_H2) >> 6); // Bytewise rotate left 2
return x1 ^ ((~x2) & x4);
}

// SWAR 32-byte parallel SBOX.
function sbox(uint256 x) internal pure returns (uint256) {
uint256 x1 = ((x & MASK_L1) << 1) | ((x & MASK_H1) >> 7);
uint256 x2 = ((x1 & MASK_L1) << 1) | ((x1 & MASK_H1) >> 7);
uint256 t = x & x1;
t = ((t & MASK_L3) << 3) | ((t & MASK_H3) >> 5);
return x1 ^ ((~x2) & t);
}

// Bitwise rotate a byte left one place, rotates 32 bytes in parallel using SWAR.
function rot1(uint256 x) internal pure returns (uint256) {
uint256 left = (x & MASK_L1) << 1;
uint256 right = (x & MASK_H1) >> 7;
return left | right;
}
}
Loading
Loading