diff --git a/.github/workflows/archive_service_healthcheck.yml b/.github/workflows/archive_service_healthcheck.yml new file mode 100644 index 00000000..aca14a35 --- /dev/null +++ b/.github/workflows/archive_service_healthcheck.yml @@ -0,0 +1,230 @@ +name: Archive Service Healthcheck +permissions: + contents: read + pull-requests: write + +on: + schedule: + # Every 8 hours (UTC) + - cron: '0 */8 * * *' + workflow_dispatch: + push: + branches: + - archive_alert + +jobs: + check_blob_sidecars_API_for_critical_blob: + name: Check blob_sidecars API for critical blob + runs-on: ubuntu-latest + timeout-minutes: 5 + env: + ARCHIVE_BLOB_SIDECARS_URL: https://archive.mainnet.ethstorage.io:9645/eth/v1/beacon/blob_sidecars/13164810?indices=3 + ARCHIVE_BLOB_HASH: "5475d05275aaae328b99a4f4058ac1e121eaa4e4d4d378d292d6130f32d6ede0" + + outputs: + failure_detail: ${{ steps.check_endpoint.outputs.failure_detail }} + archive_blob_sidecars_url: ${{ steps.check_endpoint.outputs.archive_blob_sidecars_url }} + daily_success_send: ${{ steps.daily_success.outputs.send }} + + steps: + - name: Check blob_sidecars endpoint + id: check_endpoint + shell: bash + run: | + set -euo pipefail + + url='${{ env.ARCHIVE_BLOB_SIDECARS_URL }}' + expected_blob_hash='${{ env.ARCHIVE_BLOB_HASH }}' + + echo "archive_blob_sidecars_url=$url" >> "$GITHUB_OUTPUT" + + resp_file="${RUNNER_TEMP}/blob_sidecars.json" + + # Ensure file exists so it can be attached in the failure email. + : > "$resp_file" + + # Capture status code; use retries/timeouts to reduce flakiness. + code="$( + curl --silent --show-error --location \ + --retry 3 --retry-delay 5 --retry-all-errors \ + --connect-timeout 10 --max-time 30 \ + --output "$resp_file" --write-out '%{http_code}' \ + "$url" \ + || echo '000' + )" + + echo "HTTP status: $code" + if [[ "$code" != "200" ]]; then + failure_detail="Unexpected HTTP status code: expected 200, got ${code}" + echo "failure_detail=$failure_detail" >> $GITHUB_OUTPUT + exit 1 + fi + + blob="$(jq -r '.data[0].blob // empty' "$resp_file")" + if [[ -z "$blob" ]]; then + echo "failure_detail=Missing .data[0].blob in response" >> $GITHUB_OUTPUT + exit 1 + fi + + # Compare by SHA-256 hash. + actual_blob_hash="$(printf '%s' "$blob" | sha256sum | awk '{print $1}')" + if [[ -z "$actual_blob_hash" ]]; then + echo "failure_detail=Failed to compute blob sha256" >> $GITHUB_OUTPUT + exit 1 + fi + + if [[ "$actual_blob_hash" != "$expected_blob_hash" ]]; then + echo "failure_detail=Blob hash mismatch (expected ${expected_blob_hash}, got ${actual_blob_hash})" >> $GITHUB_OUTPUT + exit 1 + fi + + echo "Blob data OK" + + - name: Decide whether to send daily success email + id: daily_success + shell: bash + run: | + set -euo pipefail + + send=false + + # Manual runs: always email on success. + if [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then + send=true + elif [[ "${GITHUB_EVENT_NAME}" == "schedule" ]]; then + # Scheduled runs: every 8 hours (00/08/16 UTC). Send success email only once per day. + hour_utc="$(date -u +%H)" + if [[ "$hour_utc" == "00" ]]; then + send=true + fi + fi + + echo "Event: ${GITHUB_EVENT_NAME}" + echo "send=$send" >> "$GITHUB_OUTPUT" + + check_latest_uploaded_blob: + name: Check latest blob with blobs API from Beacon + runs-on: ubuntu-latest + timeout-minutes: 10 + + outputs: + failure_detail: ${{ steps.putblob.outputs.failure_detail }} + archive_url: ${{ steps.putblob.outputs.archive_url }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install foundry (cast) + uses: foundry-rs/foundry-toolchain@v1 + with: + version: stable + + - name: Find latest PutBlob and fetch beacon blobs + id: putblob + shell: bash + env: + EL_RPC_URL: ${{ secrets.ARCHIVE_SERVICE_EL_RPC_URL }} + BEACON_API: ${{ secrets.ARCHIVE_SERVICE_BEACON_API }} + run: | + set -euo pipefail + out_file="$(mktemp)" + set +e + bash ./integration_tests/scripts/check_latest_blob.sh 2>&1 | tee "$out_file" + status=${PIPESTATUS[0]} + set -e + if [[ "$status" == "0" ]]; then + exit 0 + fi + + echo "failure_detail<> "$GITHUB_OUTPUT" + tail -n 80 "$out_file" >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" + exit 1 + + notify: + name: Notify (combined) + runs-on: ubuntu-latest + timeout-minutes: 5 + needs: + - check_blob_sidecars_API_for_critical_blob + - check_latest_uploaded_blob + if: ${{ always() }} + + steps: + - name: Compose email + id: compose + shell: bash + env: + EVENT_NAME: ${{ github.event_name }} + BLOB_RESULT: ${{ needs.check_blob_sidecars_API_for_critical_blob.result }} + PUTBLOB_RESULT: ${{ needs.check_latest_uploaded_blob.result }} + BLOB_FAILURE_DETAIL: ${{ needs.check_blob_sidecars_API_for_critical_blob.outputs.failure_detail }} + PUTBLOB_FAILURE_DETAIL: ${{ needs.check_latest_uploaded_blob.outputs.failure_detail }} + PUTBLOB_ARCHIVE_URL: ${{ needs.check_latest_uploaded_blob.outputs.archive_url }} + ARCHIVE_BLOB_SIDECARS_URL: ${{ needs.check_blob_sidecars_API_for_critical_blob.outputs.archive_blob_sidecars_url }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + set -euo pipefail + + send=false + overall="OK" + + if [[ "$BLOB_RESULT" != "success" || "$PUTBLOB_RESULT" != "success" ]]; then + overall="FAILED" + send=true + else + # Success: send only on workflow_dispatch, or once per day at 00 UTC for scheduled runs. + if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then + send=true + elif [[ "$EVENT_NAME" == "schedule" ]]; then + hour_utc="$(date -u +%H)" + if [[ "$hour_utc" == "00" ]]; then + send=true + fi + fi + fi + + if [[ "$overall" == "OK" ]]; then + subject="✅ Archive Service Healthcheck OK" + else + subject="❌ Archive Service Healthcheck FAILED" + fi + + echo "send=$send" >> "$GITHUB_OUTPUT" + echo "subject=$subject" >> "$GITHUB_OUTPUT" + + echo "body<> "$GITHUB_OUTPUT" + echo "Archive Service Healthcheck: $overall" >> "$GITHUB_OUTPUT" + echo >> "$GITHUB_OUTPUT" + echo "Event: $EVENT_NAME" >> "$GITHUB_OUTPUT" + echo "Run: $RUN_URL" >> "$GITHUB_OUTPUT" + echo >> "$GITHUB_OUTPUT" + echo "check_blob_sidecars_API_for_critical_blob: $BLOB_RESULT" >> "$GITHUB_OUTPUT" + echo " URL: $ARCHIVE_BLOB_SIDECARS_URL" >> "$GITHUB_OUTPUT" + if [[ -n "${BLOB_FAILURE_DETAIL:-}" ]]; then + echo " Failure detail: $BLOB_FAILURE_DETAIL" >> "$GITHUB_OUTPUT" + fi + echo >> "$GITHUB_OUTPUT" + echo "check_latest_uploaded_blob: $PUTBLOB_RESULT" >> "$GITHUB_OUTPUT" + if [[ -n "${PUTBLOB_ARCHIVE_URL:-}" ]]; then + echo " Archive URL: $PUTBLOB_ARCHIVE_URL" >> "$GITHUB_OUTPUT" + fi + if [[ -n "${PUTBLOB_FAILURE_DETAIL:-}" ]]; then + echo " Failure detail (tail):" >> "$GITHUB_OUTPUT" + echo "$PUTBLOB_FAILURE_DETAIL" >> "$GITHUB_OUTPUT" + fi + echo "EOF" >> "$GITHUB_OUTPUT" + + - name: Send combined email + if: ${{ steps.compose.outputs.send == 'true' }} + uses: dawidd6/action-send-mail@v6 + with: + server_address: smtp.gmail.com + server_port: 465 + username: ${{ secrets.ARCHIVE_SERVICE_SMTP_USERNAME }} + password: ${{ secrets.ARCHIVE_SERVICE_SMTP_PASSWORD }} + from: ES Archive Service + to: ${{ secrets.ARCHIVE_SERVICE_EMAIL_TO }} + subject: ${{ steps.compose.outputs.subject }} + body: ${{ steps.compose.outputs.body }} diff --git a/integration_tests/scripts/check_latest_blob.sh b/integration_tests/scripts/check_latest_blob.sh new file mode 100755 index 00000000..73b44534 --- /dev/null +++ b/integration_tests/scripts/check_latest_blob.sh @@ -0,0 +1,191 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Hardcoded mainnet constants +genesis_time=1606824023 +seconds_per_slot=12 + +# Search back this many blocks for PutBlob events +lookback=2400 + +ES_CONTRACT_ADDRESS="${ES_CONTRACT_ADDRESS:-0xf0193d6E8fc186e77b6E63af4151db07524f6a7A}" +ARCHIVE_RPC_URL="${ARCHIVE_RPC_URL:-https://archive.mainnet.ethstorage.io:9645}" + +EL_RPC_URL="${EL_RPC_URL:-}" +BEACON_API="${BEACON_API:-}" + +if [[ -z "${EL_RPC_URL:-}" ]]; then + echo "Missing EL_RPC_URL" >&2 + exit 2 +fi + +if [[ -z "${BEACON_API:-}" ]]; then + echo "Missing BEACON_API" >&2 + exit 2 +fi + +for bin in cast jq curl; do + command -v "$bin" >/dev/null 2>&1 || { echo "Missing dependency: $bin" >&2; exit 2; } +done + +topic0="$(cast sig-event "PutBlob(uint256,uint256,bytes32)")" + +latest_dec="$(cast block-number -r "$EL_RPC_URL")" +if [[ -z "$latest_dec" ]]; then +echo "Failed to get latest block" >&2 +exit 1 +fi + +latest_dec=$((latest_dec - 64)) # adjust to finalized +from_dec=$((latest_dec - lookback)) +from_hex="$(cast to-hex "$from_dec")" +to_hex="$(cast to-hex "$latest_dec")" + +echo "Searching PutBlob logs" +echo " contract: $ES_CONTRACT_ADDRESS" +echo " blocks: $from_dec..$latest_dec" + +params="$(jq -nc --arg from "$from_hex" --arg to "$to_hex" --arg addr "$ES_CONTRACT_ADDRESS" --arg t0 "$topic0" '[{fromBlock:$from,toBlock:$to,address:$addr,topics:[$t0]}]')" +logs_json="$(cast rpc --raw -r "$EL_RPC_URL" eth_getLogs "$params")" + +# Handle both possible shapes: +# - raw JSON-RPC: {"jsonrpc":"2.0","id":...,"result":[...]} +# - cast-decoded result: [...] +if echo "$logs_json" | jq -e 'type=="object" and has("error")' >/dev/null 2>&1; then + echo "EL RPC returned error:" >&2 + echo "$logs_json" | jq -c '.error' >&2 + exit 1 +fi + +logs_arr="$(echo "$logs_json" | jq -c 'if type=="object" and has("result") then .result elif type=="array" then . else [] end')" + +# eth_getLogs result order is ascending by blockNumber/logIndex; take the last entry. +last_log="$(echo "$logs_arr" | jq -c 'last')" +if [[ "$last_log" == "null" || -z "$last_log" ]]; then + echo "No PutBlob logs found in lookback window" >&2 + exit 1 +fi + +# PutBlob(uint256 indexed kvIdx, uint256 indexed kvSize, bytes32 indexed dataHash) +dataHash="$(echo "$last_log" | jq -r '.topics[3] // empty')" + +if [[ -z "$dataHash" ]]; then + echo "Missing expected PutBlob topic (need topics[3])" >&2 + echo "$last_log" | jq -c . >&2 + exit 1 +fi + +echo "PutBlob params" +echo " dataHash: $dataHash" + +block_hex="$(echo "$last_log" | jq -r '.blockNumber')" +tx_hash="$(echo "$last_log" | jq -r '.transactionHash')" + +# Get block timestamp +block_json="$(cast rpc --raw -r "$EL_RPC_URL" eth_getBlockByNumber "[\"$block_hex\", false]")" +if echo "$block_json" | jq -e 'type=="object" and has("error")' >/dev/null 2>&1; then + echo "EL RPC returned error (eth_getBlockByNumber):" >&2 + echo "$block_json" | jq -c '.error' >&2 + exit 1 +fi + +block_obj="$(echo "$block_json" | jq -c 'if type=="object" and has("result") then .result elif type=="object" then . else empty end')" +if [[ -z "$block_obj" || "$block_obj" == "null" ]]; then + echo "eth_getBlockByNumber returned empty block for $block_hex" >&2 + exit 1 +fi + +ts_hex="$(echo "$block_obj" | jq -r '.timestamp // empty')" +if [[ -z "$ts_hex" || "$ts_hex" == "null" ]]; then + echo "Missing block timestamp in eth_getBlockByNumber response" >&2 + echo "$block_obj" | jq -c . >&2 + exit 1 +fi + +if [[ "$ts_hex" == 0x* || "$ts_hex" == 0X* ]]; then + ts_dec=$((16#${ts_hex#0x})) +else + # Some providers may return decimal timestamps. + ts_dec=$((ts_hex)) +fi + +if (( ts_dec < genesis_time )); then + echo "Block timestamp is before genesis_time" >&2 + exit 1 +fi + +slot=$(((ts_dec - genesis_time) / seconds_per_slot)) + +echo "Latest PutBlob" +echo " tx: $tx_hash" +echo " block: $(cast to-dec "$block_hex")" +echo " slot: $slot" + +# Compare `.data` between beacon blobs API and archive service blobs API. +beacon_url="$BEACON_API/eth/v1/beacon/blobs/$slot?versioned_hashes=${dataHash}" +archive_url="$ARCHIVE_RPC_URL/eth/v1/beacon/blobs/$slot?versioned_hashes=${dataHash}" + +if [[ -n "${GITHUB_OUTPUT:-}" ]]; then + echo "beacon_url=$beacon_url" >> "$GITHUB_OUTPUT" + echo "archive_url=$archive_url" >> "$GITHUB_OUTPUT" +fi + +echo "Fetching blobs (filtered by versioned_hashes)" +echo " beacon: $beacon_url" +echo " archiveService: $archive_url" + +beacon_resp="$( + curl --silent --show-error --location \ + --retry 3 --retry-delay 2 --retry-all-errors \ + --connect-timeout 10 --max-time 60 \ + --write-out '\n%{http_code}' \ + "$beacon_url" \ + || printf '\n000' +)" +beacon_code="${beacon_resp##*$'\n'}" +beacon_body="${beacon_resp%$'\n'*}" +echo "Beacon /blobs status: $beacon_code" +if [[ "$beacon_code" != "200" ]]; then + echo "Beacon blobs request failed (HTTP $beacon_code)" >&2 + echo "URL: $beacon_url" >&2 + exit 1 +fi + +archive_resp="$( + curl --silent --show-error --location \ + --retry 3 --retry-delay 2 --retry-all-errors \ + --connect-timeout 10 --max-time 60 \ + --write-out '\n%{http_code}' \ + "$archive_url" \ + || printf '\n000' +)" +archive_code="${archive_resp##*$'\n'}" +archive_body="${archive_resp%$'\n'*}" +echo "ArchiveService /blobs status: $archive_code" +if [[ "$archive_code" != "200" ]]; then + echo "Archive service blobs request failed (HTTP $archive_code)" >&2 + echo "URL: $archive_url" >&2 + exit 1 +fi + +# Basic sanity: both responses have `.data` as array +if [[ "$(jq -r '.data | type' <<<"$beacon_body")" != "array" ]]; then + echo "Unexpected beacon /blobs response shape" >&2 + exit 1 +fi +if [[ "$(jq -r '.data | type' <<<"$archive_body")" != "array" ]]; then + echo "Unexpected archive service /blobs response shape" >&2 + exit 1 +fi + +# Minimal comparison: hash the full `.data` array JSON and compare. +beacon_data_hash="$(jq -c '.data' <<<"$beacon_body" | sha256sum)" +archive_data_hash="$(jq -c '.data' <<<"$archive_body" | sha256sum)" + +if [[ "$beacon_data_hash" == "$archive_data_hash" ]]; then + echo "✅ Match" + exit 0 +fi + +echo "❌ Mismatch" >&2 +exit 1 \ No newline at end of file