From f5924e073dd286cad96831d8558f170ea4df190f Mon Sep 17 00:00:00 2001 From: weiihann Date: Fri, 9 Jan 2026 13:18:29 +0800 Subject: [PATCH] feat: add int_execution_state_size --- .../054_int_execution_state_size.down.sql | 2 + .../054_int_execution_state_size.up.sql | 33 +++++ .../external/execution_state_size_delta.sql | 31 +++++ .../int_execution_state_size.sql | 119 ++++++++++++++++++ 4 files changed, 185 insertions(+) create mode 100644 migrations/054_int_execution_state_size.down.sql create mode 100644 migrations/054_int_execution_state_size.up.sql create mode 100644 models/external/execution_state_size_delta.sql create mode 100644 models/transformations/int_execution_state_size.sql diff --git a/migrations/054_int_execution_state_size.down.sql b/migrations/054_int_execution_state_size.down.sql new file mode 100644 index 00000000..a60b1ff1 --- /dev/null +++ b/migrations/054_int_execution_state_size.down.sql @@ -0,0 +1,2 @@ +DROP TABLE IF EXISTS `${NETWORK_NAME}`.int_execution_state_size_local; +DROP TABLE IF EXISTS `${NETWORK_NAME}`.int_execution_state_size; \ No newline at end of file diff --git a/migrations/054_int_execution_state_size.up.sql b/migrations/054_int_execution_state_size.up.sql new file mode 100644 index 00000000..b86c3e37 --- /dev/null +++ b/migrations/054_int_execution_state_size.up.sql @@ -0,0 +1,33 @@ +CREATE TABLE `${NETWORK_NAME}`.int_execution_state_size_local ON CLUSTER '{cluster}' ( + `updated_date_time` DateTime COMMENT 'Timestamp when the record was last updated' Codec(DoubleDelta, ZSTD(1)), + `block_number` UInt64 COMMENT 'Block number at which the state size was measured' Codec(DoubleDelta, ZSTD(1)), + `state_root` FixedString(66) COMMENT 'State root hash of the execution layer at this block' Codec(ZSTD(1)), + `parent_state_root` FixedString(66) COMMENT 'State root hash of the execution layer at the parent block' Codec(ZSTD(1)), + `accounts` UInt64 COMMENT 'Total number of accounts in the state' Codec(ZSTD(1)), + `account_bytes` UInt64 COMMENT 'Total bytes used by account data' Codec(ZSTD(1)), + `account_trienodes` UInt64 COMMENT 'Number of trie nodes in the account trie' Codec(ZSTD(1)), + `account_trienode_bytes` UInt64 COMMENT 'Total bytes used by account trie nodes' Codec(ZSTD(1)), + `contract_codes` UInt64 COMMENT 'Total number of contract codes stored' Codec(ZSTD(1)), + `contract_code_bytes` UInt64 COMMENT 'Total bytes used by contract code' Codec(ZSTD(1)), + `storages` UInt64 COMMENT 'Total number of storage slots in the state' Codec(ZSTD(1)), + `storage_bytes` UInt64 COMMENT 'Total bytes used by storage data' Codec(ZSTD(1)), + `storage_trienodes` UInt64 COMMENT 'Number of trie nodes in the storage trie' Codec(ZSTD(1)), + `storage_trienode_bytes` UInt64 COMMENT 'Total bytes used by storage trie nodes' Codec(ZSTD(1)), +) ENGINE = ReplicatedReplacingMergeTree( + '/clickhouse/{installation}/{cluster}/tables/{shard}/{database}/{table}', + '{replica}', + `updated_date_time` +) +PARTITION BY intDiv(block_number, 5000000) +ORDER BY (block_number, state_root) +COMMENT 'Contains execution layer state size metrics including account, contract code, and storage data measurements at specific block heights.'; + +CREATE TABLE `${NETWORK_NAME}`.int_execution_state_size ON CLUSTER '{cluster}' AS `${NETWORK_NAME}`.int_execution_state_size_local +ENGINE = Distributed( + '{cluster}', + `${NETWORK_NAME}`, + int_execution_state_size_local, + cityHash64( + block_number, + state_root) +); diff --git a/models/external/execution_state_size_delta.sql b/models/external/execution_state_size_delta.sql new file mode 100644 index 00000000..96d5b19d --- /dev/null +++ b/models/external/execution_state_size_delta.sql @@ -0,0 +1,31 @@ +--- +table: execution_state_size_delta +cache: + incremental_scan_interval: 1m + full_scan_interval: 24h +interval: + type: block +lag: 384 +--- +SELECT + {{ if .cache.is_incremental_scan }} + '{{ .cache.previous_min }}' as min, + {{ else }} + min(block_number) as min, + {{ end }} + max(block_number) as max +FROM {{ .self.helpers.from }} +WHERE + meta_network_name = '{{ .env.NETWORK }}' + + -- previous_max if incremental scan and is set, otherwise default/env + {{- $bn := default "0" .env.EXTERNAL_MODEL_MIN_BLOCK -}} + {{- if .cache.is_incremental_scan -}} + {{- if .cache.previous_max -}} + {{- $bn = .cache.previous_max -}} + {{- end -}} + {{- end }} + AND block_number >= {{ $bn }} + {{- if .cache.is_incremental_scan }} + AND block_number <= {{ $bn }} + {{ default "10000" .env.EXTERNAL_MODEL_SCAN_SIZE_BLOCK }} + {{- end }} diff --git a/models/transformations/int_execution_state_size.sql b/models/transformations/int_execution_state_size.sql new file mode 100644 index 00000000..45d94fc6 --- /dev/null +++ b/models/transformations/int_execution_state_size.sql @@ -0,0 +1,119 @@ +--- +table: int_execution_state_size +type: incremental +interval: + type: block + max: 10000 +fill: + direction: "tail" + allow_gap_skipping: false +schedules: + forwardfill: "@every 5m" +tags: + - execution + - storage +dependencies: + - "{{external}}.execution_state_size_delta" +--- +INSERT INTO + `{{ .self.database }}`.`{{ .self.table }}` +WITH +-- Get the maximum block number before this chunk +max_prev_block AS ( + SELECT max(block_number) AS block_number + FROM `{{ .self.database }}`.`{{ .self.table }}` FINAL + HAVING block_number < {{ .bounds.start }} +), +-- Get the last known cumulative state before this chunk +-- Returns all records with the max block number (may have multiple state_roots due to reorgs) +prev_state AS ( + SELECT + block_number, + state_root, + accounts, + account_bytes, + account_trienodes, + account_trienode_bytes, + contract_codes, + contract_code_bytes, + storages, + storage_bytes, + storage_trienodes, + storage_trienode_bytes + FROM `{{ .self.database }}`.`{{ .self.table }}` FINAL + WHERE block_number = (SELECT block_number FROM max_prev_block) +), +-- Get all delta records for this block range +all_deltas AS ( + SELECT + block_number, + state_root, + parent_state_root, + account_delta, + account_bytes_delta, + account_trienode_delta, + account_trienode_bytes_delta, + contract_code_delta, + contract_code_bytes_delta, + storage_delta, + storage_bytes_delta, + storage_trienode_delta, + storage_trienode_bytes_delta + FROM {{ index .dep "{{external}}" "execution_state_size_delta" "helpers" "from" }} FINAL + WHERE block_number BETWEEN {{ .bounds.start }} AND {{ .bounds.end }} + AND meta_network_name = '{{ .env.NETWORK }}' +), +-- Build valid state roots: start with prev_state roots (or genesis), then collect all reachable state_roots +valid_state_roots AS ( + -- Previous state roots (or genesis empty state root if no prev_state) + SELECT state_root FROM prev_state + UNION ALL + SELECT '0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421' AS state_root + UNION ALL + -- All state_roots from deltas in this range (these are valid parents for subsequent blocks) + SELECT state_root FROM all_deltas +), +-- Filter to canonical chain: each block's parent_state_root must exist in valid_state_roots +canonical_deltas AS ( + SELECT d.* + FROM all_deltas d + WHERE d.parent_state_root IN (SELECT state_root FROM valid_state_roots) +), +-- Get the first canonical delta's parent_state_root to find the matching prev_state +first_parent_state_root AS ( + SELECT parent_state_root + FROM canonical_deltas + WHERE block_number = {{ .bounds.start }} + LIMIT 1 +) +SELECT + fromUnixTimestamp({{ .task.start }}) AS updated_date_time, + d.block_number, + d.state_root, + d.parent_state_root, + -- Account metrics: previous state + running sum of deltas + toUInt64(COALESCE((SELECT accounts FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.account_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS accounts, + toUInt64(COALESCE((SELECT account_bytes FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.account_bytes_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS account_bytes, + toUInt64(COALESCE((SELECT account_trienodes FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.account_trienode_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS account_trienodes, + toUInt64(COALESCE((SELECT account_trienode_bytes FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.account_trienode_bytes_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS account_trienode_bytes, + -- Contract code metrics + toUInt64(COALESCE((SELECT contract_codes FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.contract_code_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS contract_codes, + toUInt64(COALESCE((SELECT contract_code_bytes FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.contract_code_bytes_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS contract_code_bytes, + -- Storage metrics + toUInt64(COALESCE((SELECT storages FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.storage_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS storages, + toUInt64(COALESCE((SELECT storage_bytes FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.storage_bytes_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS storage_bytes, + toUInt64(COALESCE((SELECT storage_trienodes FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.storage_trienode_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS storage_trienodes, + toUInt64(COALESCE((SELECT storage_trienode_bytes FROM prev_state WHERE state_root = (SELECT parent_state_root FROM first_parent_state_root)), 0) + + SUM(d.storage_trienode_bytes_delta) OVER (ORDER BY d.block_number ROWS UNBOUNDED PRECEDING)) AS storage_trienode_bytes +FROM canonical_deltas d +ORDER BY d.block_number +SETTINGS max_threads = 4;