diff --git a/.github/workflows/ray-rs.yml b/.github/workflows/ray-rs.yml index b06aaa2..0c89e1a 100644 --- a/.github/workflows/ray-rs.yml +++ b/.github/workflows/ray-rs.yml @@ -12,7 +12,26 @@ on: paths: - "ray-rs/**" - ".github/workflows/ray-rs.yml" + schedule: + - cron: "0 9 * * 1" workflow_dispatch: + inputs: + ann_pq_profile: + description: ANN PQ tracking profile (manual runs only) + type: choice + required: false + default: fast + options: + - fast + - full + replication_soak_profile: + description: Replication soak tracking profile (manual runs only) + type: choice + required: false + default: fast + options: + - fast + - full concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -22,6 +41,7 @@ defaults: jobs: meta: name: Release Gate + if: github.event_name != 'schedule' runs-on: ubuntu-latest outputs: all: ${{ steps.flags.outputs.all }} @@ -53,6 +73,262 @@ jobs: if echo "$COMMIT_MSG" | grep -E "^(all|js|ts|py|rs|core): [0-9]+\.[0-9]+\.[0-9]+"; then echo "any=true" >> "$GITHUB_OUTPUT" fi + + replication-perf-gate: + name: Replication Perf Gate + if: github.event_name != 'schedule' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + ray-rs/target/ + key: replication-perf-gate-${{ hashFiles('ray-rs/Cargo.lock') }} + - name: Run replication perf gate + run: ./scripts/replication-perf-gate.sh + env: + STAMP: ci-${{ github.run_id }}-${{ github.run_attempt }} + # CI-tuned defaults to reduce runtime while keeping meaningful signal. + ITERATIONS: "10000" + SEED_COMMITS: "1000" + BACKLOG_COMMITS: "3000" + ATTEMPTS: "3" + P95_MAX_RATIO: "1.30" + MIN_CATCHUP_FPS: "2500" + MIN_THROUGHPUT_RATIO: "0.10" + - name: Upload replication benchmark logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: replication-perf-gate-logs + path: docs/benchmarks/results/ci-${{ github.run_id }}-${{ github.run_attempt }}-replication-*gate*.txt + if-no-files-found: ignore + open-close-vector-gate: + name: Open/Close Vector Gate + if: github.event_name != 'schedule' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + ray-rs/target/ + key: open-close-vector-gate-${{ hashFiles('ray-rs/Cargo.lock') }} + - name: Run open/close vector gate + run: ./scripts/open-close-vector-gate.sh + env: + STAMP: ci-${{ github.run_id }}-${{ github.run_attempt }} + ATTEMPTS: "1" + MAX_SMALL_RW_RATIO: "5.0" + MAX_SMALL_RO_RATIO: "5.0" + MAX_LARGE_RW_RATIO: "2.5" + MAX_LARGE_RO_RATIO: "2.5" + - name: Upload open/close vector gate logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: open-close-vector-gate-logs + path: docs/benchmarks/results/ci-${{ github.run_id }}-${{ github.run_attempt }}-open-close-vector-gate*.txt + if-no-files-found: ignore + open-close-non-vector-gate: + name: Open/Close Non-Vector Gate + if: github.event_name != 'schedule' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + ray-rs/target/ + key: open-close-non-vector-gate-${{ hashFiles('ray-rs/Cargo.lock') }} + - name: Run open/close non-vector gate + run: ./scripts/open-close-non-vector-gate.sh + env: + STAMP: ci-${{ github.run_id }}-${{ github.run_attempt }} + ATTEMPTS: "1" + MAX_SMALL_RW_US: "900.0" + MAX_SMALL_RO_US: "900.0" + MAX_LARGE_RW_US: "5000.0" + MAX_LARGE_RO_US: "5000.0" + - name: Upload open/close non-vector gate logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: open-close-non-vector-gate-logs + path: docs/benchmarks/results/ci-${{ github.run_id }}-${{ github.run_attempt }}-open-close-non-vector-gate*.txt + if-no-files-found: ignore + ann-quality-gate: + name: ANN Quality Gate + if: github.event_name != 'schedule' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + ray-rs/target/ + key: ann-quality-gate-${{ hashFiles('ray-rs/Cargo.lock') }} + - name: Run ANN quality gate + run: ./scripts/vector-ann-gate.sh + env: + STAMP: ci-${{ github.run_id }}-${{ github.run_attempt }} + # CI defaults tuned from baseline matrix; latency-first IVF-PQ gate. + ATTEMPTS: "3" + ALGORITHM: "ivf_pq" + RESIDUALS: "false" + PQ_SUBSPACES: "48" + PQ_CENTROIDS: "256" + N_PROBE: "16" + MIN_RECALL_AT_K: "0.16" + MAX_P95_MS: "8.0" + - name: Upload ANN gate logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: ann-quality-gate-logs + path: docs/benchmarks/results/ci-${{ github.run_id }}-${{ github.run_attempt }}-vector-ann-gate*.txt + if-no-files-found: ignore + replication-soak-tracking: + name: Replication Soak Tracking + if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + ray-rs/target/ + key: replication-soak-tracking-${{ hashFiles('ray-rs/Cargo.lock') }} + - name: Run replication soak tracking (fast) + if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && inputs.replication_soak_profile != 'full') + run: ./scripts/replication-soak-gate.sh + env: + STAMP: ci-${{ github.run_id }}-${{ github.run_attempt }} + # Non-blocking churn/promotion/reseed trend run (scheduled + manual). + ATTEMPTS: "1" + REPLICAS: "5" + CYCLES: "12" + COMMITS_PER_CYCLE: "80" + ACTIVE_REPLICAS: "3" + CHURN_INTERVAL: "3" + PROMOTION_INTERVAL: "4" + RESEED_CHECK_INTERVAL: "2" + MAX_FRAMES: "128" + RECOVERY_MAX_LOOPS: "80" + SEGMENT_MAX_BYTES: "1" + RETENTION_MIN: "48" + MAX_ALLOWED_LAG: "2500" + MIN_PROMOTIONS: "2" + MIN_RESEEDS: "1" + - name: Run replication soak tracking (full) + if: github.event_name == 'workflow_dispatch' && inputs.replication_soak_profile == 'full' + run: ./scripts/replication-soak-gate.sh + env: + STAMP: ci-${{ github.run_id }}-${{ github.run_attempt }} + # Manual deep soak profile for longer churn/promotion trend checks. + ATTEMPTS: "1" + REPLICAS: "5" + CYCLES: "24" + COMMITS_PER_CYCLE: "120" + ACTIVE_REPLICAS: "4" + CHURN_INTERVAL: "2" + PROMOTION_INTERVAL: "3" + RESEED_CHECK_INTERVAL: "2" + MAX_FRAMES: "256" + RECOVERY_MAX_LOOPS: "120" + SEGMENT_MAX_BYTES: "1" + RETENTION_MIN: "64" + MAX_ALLOWED_LAG: "3500" + MIN_PROMOTIONS: "4" + MIN_RESEEDS: "1" + - name: Upload replication soak logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: replication-soak-tracking-logs + path: docs/benchmarks/results/ci-${{ github.run_id }}-${{ github.run_attempt }}-replication-soak-gate*.txt + if-no-files-found: ignore + ann-pq-tracking: + name: ANN PQ Tracking + if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + ray-rs/target/ + key: ann-pq-tracking-${{ hashFiles('ray-rs/Cargo.lock') }} + - name: Run ANN PQ tuning sweep (fast) + if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && inputs.ann_pq_profile != 'full') + run: ./scripts/vector-ann-pq-tuning.sh + env: + STAMP: ci-${{ github.run_id }}-${{ github.run_attempt }} + # Keep this lightweight enough for periodic trend tracking. + VECTORS: "15000" + QUERIES: "150" + N_PROBES: "8 16" + PQ_SUBSPACES_SET: "24 48" + PQ_CENTROIDS_SET: "128 256" + RESIDUALS_SET: "false" + - name: Run ANN PQ tuning sweep (full) + if: github.event_name == 'workflow_dispatch' && inputs.ann_pq_profile == 'full' + run: ./scripts/vector-ann-pq-tuning.sh + env: + STAMP: ci-${{ github.run_id }}-${{ github.run_attempt }} + # Manual deep sweep for analysis (not part of push/main gating). + VECTORS: "20000" + QUERIES: "200" + N_PROBES: "8 16" + PQ_SUBSPACES_SET: "24 48" + PQ_CENTROIDS_SET: "128 256" + RESIDUALS_SET: "false true" + - name: Upload ANN PQ tuning logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: ann-pq-tracking-logs + path: | + docs/benchmarks/results/ci-${{ github.run_id }}-${{ github.run_attempt }}-vector-ann-pq-tuning*.txt + docs/benchmarks/results/ci-${{ github.run_id }}-${{ github.run_attempt }}-vector-ann-pq-tuning*.csv + if-no-files-found: ignore # =========================================== # Node.js Builds (napi-rs) # =========================================== diff --git a/.gitignore b/.gitignore index 2bfc460..8cbb08e 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,6 @@ ray-rs/python/benchmarks/results/*.txt # Bench/trace outputs ray-rs/flamegraph.svg ray-rs/*.trace/ + +# code-intelligence +.code-intelligence/ diff --git a/code-intelligence.json b/code-intelligence.json new file mode 100644 index 0000000..c4b48ee --- /dev/null +++ b/code-intelligence.json @@ -0,0 +1,53 @@ +{ + "cache": { + "enabled": true, + "force": false + }, + "inputs": { + "exclude": [ + "node_modules", + "dist", + "build", + "target", + ".git", + ".venv", + "__pycache__", + ".code-intelligence" + ], + "include": [ + "*.ts", + "*.tsx", + "*.js", + "*.jsx", + "*.py", + "*.cpp", + "*.cc", + "*.c", + "*.hpp", + "*.h", + "*.ada" + ], + "paths": [ + "." + ], + "respectGitignore": true + }, + "output": { + "incremental": true + }, + "phases": { + "graph": true, + "indexPack": false, + "parse": true, + "scip": false, + "vector": true + }, + "projectRoot": ".", + "vector": { + "provider": { + "baseUrl": "https://api.openai.com/v1", + "kind": "http", + "model": "text-embedding-3-small" + } + } +} \ No newline at end of file diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md index 0c839c3..17c90b9 100644 --- a/docs/BENCHMARKS.md +++ b/docs/BENCHMARKS.md @@ -3,7 +3,7 @@ This document summarizes **measured** benchmark results. Raw outputs live in `docs/benchmarks/results/` so we can trace every number back to an actual run. -> Latest numbers below were captured on **February 4, 2026**. Prior results +> Latest numbers below were captured on **February 4-5, 2026**. Prior results > from **February 3, 2026** are retained for comparison. If you need fresh > numbers, rerun the commands in the next section and update this doc with the > new output files. @@ -34,6 +34,19 @@ Optional knobs (Rust): - `--group-commit-enabled` - `--group-commit-window-ms N` (default: 2) +### Rust (replication catch-up throughput) + +```bash +cd ray-rs +cargo run --release --example replication_catchup_bench --no-default-features -- \ + --seed-commits 1000 --backlog-commits 5000 --max-frames 256 --sync-mode normal +``` + +Key outputs: +- `primary_frames_per_sec` +- `catchup_frames_per_sec` +- `throughput_ratio` (`catchup/primary`) + ### Python bindings (single-file raw) ```bash @@ -64,6 +77,134 @@ cargo run --release --example vector_bench --no-default-features -- \ --vectors 10000 --dimensions 768 --iterations 1000 --k 10 --n-probe 10 ``` +### Vector compaction strategy (Rust) + +```bash +cd ray-rs +cargo run --release --example vector_compaction_bench --no-default-features -- \ + --vectors 50000 --dimensions 384 --fragment-target-size 5000 \ + --delete-ratio 0.35 --min-deletion-ratio 0.30 --max-fragments 4 --min-vectors-to-compact 10000 +``` + +Use this to compare compaction threshold tradeoffs before changing default vector/ANN maintenance policy. + +Automated matrix sweep: + +```bash +cd ray-rs +./scripts/vector-compaction-matrix.sh +``` + +Latest matrix snapshot (2026-02-08, 50k vectors, 384 dims, fragment target 5k): +- Result artifacts: + - `docs/benchmarks/results/2026-02-08-vector-compaction-matrix.txt` + - `docs/benchmarks/results/2026-02-08-vector-compaction-matrix.csv` + - `docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.txt` + - `docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.csv` +- `min_deletion_ratio=0.30`, `max_fragments=4` gives balanced reclaim/latency: + - `delete_ratio=0.35`: `14.32%` reclaim (single-run latency in low-double-digit ms on this host) + - `delete_ratio=0.55`: `22.24%` reclaim (single-run latency in single-digit ms on this host) +- `max_fragments=8` reclaims more (`28.18%` / `44.18%`) but roughly doubles compaction latency. +- `min_deletion_ratio=0.40` can skip moderate-churn compaction (`delete_ratio=0.35`), so stale deleted bytes remain. +- Recommendation: keep defaults `min_deletion_ratio=0.30`, `max_fragments_per_compaction=4`, `min_vectors_to_compact=10000`. + +### ANN algorithm matrix (Rust: IVF vs IVF-PQ) + +Single run: + +```bash +cd ray-rs +cargo run --release --example vector_ann_bench --no-default-features -- \ + --algorithm ivf_pq --vectors 20000 --dimensions 384 --queries 200 --k 10 --n-probe 16 \ + --pq-subspaces 48 --pq-centroids 256 --residuals false +``` + +Matrix sweep: + +```bash +cd ray-rs +./scripts/vector-ann-matrix.sh +``` + +Latest matrix snapshot (2026-02-08, 20k vectors, 384 dims, 200 queries, k=10): +- Result artifacts: + - `docs/benchmarks/results/2026-02-08-vector-ann-matrix.txt` + - `docs/benchmarks/results/2026-02-08-vector-ann-matrix.csv` +- At same `n_probe`, IVF had higher recall than IVF-PQ in this baseline: + - `n_probe=8`: IVF `0.1660`, IVF-PQ `0.1195` (`residuals=false`) + - `n_probe=16`: IVF `0.2905`, IVF-PQ `0.1775` (`residuals=false`) +- IVF-PQ (`residuals=false`) had lower search p95 latency than IVF: + - `n_probe=8`: `0.4508ms` vs IVF `0.7660ms` + - `n_probe=16`: `1.3993ms` vs IVF `4.0272ms` +- IVF-PQ build time was much higher than IVF in this baseline. +- Current recommendation: use latency-first IVF-PQ as default ANN path with + `residuals=false`, `pq_subspaces=48`, `pq_centroids=256`; monitor recall floor via ANN gate. + +PQ tuning sweep: + +```bash +cd ray-rs +./scripts/vector-ann-pq-tuning.sh +``` + +Latest tuning snapshot (2026-02-08): +- Result artifacts: + - `docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.txt` + - `docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.csv` +- Best recall-preserving PQ config in this sweep: + - `residuals=false`, `pq_subspaces=48`, `pq_centroids=256` + - `n_probe=8`: recall ratio vs IVF `0.6875`, p95 ratio vs IVF `0.6155` + - `n_probe=16`: recall ratio vs IVF `0.6636`, p95 ratio vs IVF `0.4634` +- Current implication: this configuration is the best IVF-PQ candidate for latency-first profiles, but still below IVF recall in this workload. + +CI tracking: +- Main workflow (`.github/workflows/ray-rs.yml`) includes non-blocking `ann-pq-tracking` + (weekly schedule + manual dispatch) running `./scripts/vector-ann-pq-tuning.sh`. +- Results are uploaded as artifact `ann-pq-tracking-logs`. +- Tracking logs are run-scoped with stamp `ci--`. +- Scheduled runs skip release/publish gating jobs; schedule path is tracking-only. +- Manual dispatch input `ann_pq_profile`: + - `fast` (default): lightweight trend sweep. + - `full`: deeper sweep (`RESIDUALS_SET=false true`) for investigation. + +ANN quality/latency gate: + +```bash +cd ray-rs +./scripts/vector-ann-gate.sh +``` + +Defaults: +- `ALGORITHM=ivf_pq`, `RESIDUALS=false`, `PQ_SUBSPACES=48`, `PQ_CENTROIDS=256` +- `N_PROBE=16`, `ATTEMPTS=3` +- `MIN_RECALL_AT_K=0.16` +- `MAX_P95_MS=8.0` + +Latest gate snapshot (2026-02-08): see `docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt*.txt` (pass). + +CI: +- Main-branch workflow (`.github/workflows/ray-rs.yml`) runs `./scripts/vector-ann-gate.sh` + and uploads logs as artifact `ann-quality-gate-logs`. +- Gate logs are run-scoped with stamp `ci--`. + +### Index pipeline hypothesis (network-dominant) + +```bash +cd ray-rs +cargo run --release --example index_pipeline_hypothesis_bench --no-default-features -- \ + --mode both --changes 200 --working-set 200 --vector-dims 128 \ + --tree-sitter-latency-ms 2 --scip-latency-ms 6 --embed-latency-ms 200 \ + --embed-batch-size 32 --embed-flush-ms 20 --embed-inflight 4 \ + --vector-apply-batch-size 64 --sync-mode normal +``` + +Interpretation: +- If `parallel` hot-path elapsed is much lower than `sequential`, async embed queueing is working. +- If `parallel` hot-path p95 is lower than `sequential`, TS+SCIP parallel parse plus unified graph commit is working. +- If `parallel` freshness p95 is too high, tune `--embed-batch-size`, `--embed-flush-ms`, + and `--embed-inflight` (or reduce overwrite churn with larger working set / dedupe rules). +- Replacement ratio (`Queue ... replaced=...`) quantifies stale embed work eliminated by dedupe. + ### SQLite baseline (single-file raw) ```bash @@ -78,6 +219,152 @@ Notes (SQLite): - WAL autocheckpoint disabled; `journal_size_limit` set to match WAL size - Edge props stored in a separate table; edges use `INSERT OR IGNORE` and props use `INSERT OR REPLACE` +### RayDB vs Memgraph (local 1-hop traversal comparison) + +This is a **local-only** comparison harness for your own machine. It builds the +same graph in both engines and benchmarks a query equivalent to: + +`db.from(alice).out(Knows).toArray()` + +Prerequisites: +- Memgraph running locally (default `127.0.0.1:7687`) + +Run with your requested shape (10k nodes, 20k edges, alice fan-out 10) using the +Rust benchmark: + +```bash +cd ray-rs +cargo run --release --example ray_vs_memgraph_bench --no-default-features -- \ + --nodes 10000 --edges 20000 --query-results 10 --iterations 5000 +``` + +Adjust result cardinality to your `5-20` target: +- `--query-results 5` +- `--query-results 20` + +Optional Python harness is still available at: +- `ray-rs/python/benchmarks/benchmark_raydb_vs_memgraph.py` + +### Replication performance gates (Phase D carry-over) + +Run both replication perf gates: + +```bash +cd ray-rs +./scripts/replication-perf-gate.sh +``` + +#### Gate A: primary commit overhead + +Compares write latency with replication disabled vs enabled (`role=primary`) +using the same benchmark harness. + +```bash +cd ray-rs +./scripts/replication-bench-gate.sh +``` + +Defaults: +- Dataset: `NODES=10000`, `EDGES=0`, `EDGE_TYPES=1`, `EDGE_PROPS=0`, `VECTOR_COUNT=0` +- Primary rotation guardrail: `REPLICATION_SEGMENT_MAX_BYTES=1073741824` +- `ITERATIONS=20000` +- `SYNC_MODE=normal` +- `ATTEMPTS=7` (median ratio across attempts is used for pass/fail) +- Pass threshold: `P95_MAX_RATIO=1.30` (replication-on p95 / baseline p95) +- `ITERATIONS` must be `>= 100` + +Example override: + +```bash +cd ray-rs +ITERATIONS=2000 ATTEMPTS=5 P95_MAX_RATIO=1.05 ./scripts/replication-bench-gate.sh +``` + +Outputs: +- `docs/benchmarks/results/YYYY-MM-DD-replication-gate-baseline.txt` (single-attempt mode) +- `docs/benchmarks/results/YYYY-MM-DD-replication-gate-primary.txt` (single-attempt mode) +- `docs/benchmarks/results/YYYY-MM-DD-replication-gate-{baseline,primary}.attemptN.txt` (multi-attempt mode) +- `STAMP` can be overridden for run-scoped output naming (used by CI). + +#### Gate B: replica catch-up throughput + +Ensures replica catch-up throughput stays healthy relative to primary commit +throughput on the same workload. + +```bash +cd ray-rs +./scripts/replication-catchup-gate.sh +``` + +Defaults: +- `SEED_COMMITS=1000` +- `BACKLOG_COMMITS=5000` +- `MAX_FRAMES=256` +- `SYNC_MODE=normal` +- `ATTEMPTS=3` (retry count for noisy host variance) +- Pass threshold: `MIN_CATCHUP_FPS=3000` +- Pass threshold: `MIN_THROUGHPUT_RATIO=0.13` (catch-up fps / primary fps) +- `BACKLOG_COMMITS` must be `>= 100` + +Example override: + +```bash +cd ray-rs +BACKLOG_COMMITS=10000 ATTEMPTS=5 MIN_THROUGHPUT_RATIO=1.10 ./scripts/replication-catchup-gate.sh +``` + +Output: +- `docs/benchmarks/results/YYYY-MM-DD-replication-catchup-gate.txt` (single-attempt mode) +- `docs/benchmarks/results/YYYY-MM-DD-replication-catchup-gate.attemptN.txt` (multi-attempt mode) +- `STAMP` can be overridden for run-scoped output naming (used by CI). + +Notes: +- Gate A = commit-path overhead. +- Gate B = replica apply throughput. +- Keep replication correctness suite green alongside perf gates: + - `cargo test --no-default-features --test replication_phase_a --test replication_phase_b --test replication_phase_c --test replication_phase_d --test replication_faults_phase_d` + - `cargo test --no-default-features replication::` + +#### Gate C: replication soak stability (lag churn + promote/reseed) + +Exercises a `1 primary + 5 replicas` soak-style scenario with rotating lag churn, +periodic promotion fence checks, and reseed recovery under retention pressure. + +```bash +cd ray-rs +./scripts/replication-soak-gate.sh +``` + +Defaults: +- `REPLICAS=5` +- `CYCLES=6` +- `COMMITS_PER_CYCLE=40` +- `ACTIVE_REPLICAS=3` +- `CHURN_INTERVAL=2` +- `PROMOTION_INTERVAL=3` +- `RESEED_CHECK_INTERVAL=2` +- `MAX_FRAMES=128` +- `RECOVERY_MAX_LOOPS=80` +- `SEGMENT_MAX_BYTES=1` +- `RETENTION_MIN=64` +- `ATTEMPTS=1` +- Pass threshold: `MAX_ALLOWED_LAG=1200` +- Pass threshold: `MIN_PROMOTIONS=2` +- Pass threshold: `MIN_RESEEDS=1` +- Invariant checks: divergence must be `0`, stale-fence rejections must equal promotions. + +Example override: + +```bash +cd ray-rs +CYCLES=18 COMMITS_PER_CYCLE=120 CHURN_INTERVAL=3 PROMOTION_INTERVAL=6 RESEED_CHECK_INTERVAL=3 MAX_ALLOWED_LAG=3000 ATTEMPTS=2 ./scripts/replication-soak-gate.sh +``` + +Output: +- `docs/benchmarks/results/YYYY-MM-DD-replication-soak-gate.txt` (single-attempt mode) +- `docs/benchmarks/results/YYYY-MM-DD-replication-soak-gate.attemptN.txt` (multi-attempt mode) +- `STAMP` can be overridden for run-scoped output naming (used by CI tracking jobs). + ## Latest Results (2026-02-04) Sync-mode sweep logs (nodes-only + edges-heavy datasets): @@ -336,6 +623,35 @@ Sync=Off, GC off: | 10 | 313.67K/s | | 16 | 296.99K/s | +#### Index pipeline hypothesis notes (2026-02-05) + +Goal: validate whether remote embedding latency dominates enough that we should +decouple graph hot path from vector persistence using async batching + dedupe. + +Harness: +- `ray-rs/examples/index_pipeline_hypothesis_bench.rs` +- Simulated tree-sitter + SCIP parse, graph writes, synthetic embed latency, batched vector apply. +- `sequential`: TS parse -> TS graph commit -> SCIP parse -> SCIP graph commit -> embed -> vector apply. +- `parallel`: TS+SCIP parse overlap -> unified graph commit -> async embed queue -> batched vector apply. + +Sample runs (200 events, working set=200, batch=32, flush=20ms, inflight=4, vector-apply-batch=64): + +| TS/SCIP parse | Embed latency | Mode | Hot path elapsed | Total elapsed | Hot p95 | Freshness p95 | Replaced jobs | +|---------------|---------------|------|------------------|---------------|---------|----------------|---------------| +| 1ms / 1ms | 50ms/batch | Sequential | 11.260s | 11.314s | 2.64ms | 55.09ms | n/a | +| 1ms / 1ms | 50ms/batch | Parallel | 0.255s | 0.329s | 1.30ms | 168.43ms | 6.00% | +| 2ms / 6ms | 200ms/batch | Sequential | 42.477s | 42.679s | 10.22ms | 205.11ms | n/a | +| 2ms / 6ms | 200ms/batch | Parallel | 1.448s | 1.687s | 7.60ms | 775.61ms | 5.50% | + +Takeaway: +- Hot path throughput improves dramatically with async pipeline. +- Vector freshness depends on batching/queue pressure and overwrite churn; tune freshness separately + from hot-path latency target. + +Raw logs: +- `docs/benchmarks/results/2026-02-05-index-pipeline-hypothesis-embed50.txt` +- `docs/benchmarks/results/2026-02-05-index-pipeline-hypothesis-embed200.txt` + ## Prior Results (2026-02-03) Raw logs: diff --git a/docs/REPLICATION_PLAN.md b/docs/REPLICATION_PLAN.md new file mode 100644 index 0000000..e5de2ec --- /dev/null +++ b/docs/REPLICATION_PLAN.md @@ -0,0 +1,468 @@ +# KiteDB Replication V1 Plan (Feature + Code) + +Status: Phase D complete; V1 release cut pending + +## 1) Goals + +- Single-writer primary, multiple read replicas. +- Keep local embedded path default and fastest when replication is disabled. +- Add optional read-your-writes on replicas via commit token wait. +- Manual replica promotion to primary (no automatic election in V1). + +## 2) Non-Goals (V1) + +- Multi-primary / multi-writer. +- Automatic leader election / consensus. +- WAN topology optimization and geo-routing. +- Replicating rebuildable derived indexes as required state. + +## 3) Scope + +- Engine: single-file `.kitedb` path only. +- Topology target: `1 primary + up to 5 replicas`. +- Transport target: pull-based replication first (HTTP contract), push later without format break. +- API policy: additive only. + +## 4) Replication Invariants + +1. Exactly one writable primary per epoch. +2. Replica apply order is commit order from primary. +3. Replica apply is idempotent by log index. +4. Commit token monotonicity per epoch. +5. Checkpoint/compaction on primary must not break replica catch-up semantics. +6. If replication is disabled, existing behavior and performance profile remain unchanged. + +## 5) Data Model: Source-of-Truth vs Derived + +### Authoritative replicated state + +- Committed transaction stream (logical mutation records). +- Snapshot checkpoint image + metadata. +- Replication epoch and monotonic log index. + +### Derived/rebuildable state (not required for correctness replication) + +- Caches (`cache::*`). +- In-memory overlays reconstructed from snapshot + replicated tx stream. +- Rebuildable vector/search side structures (unless explicitly marked authoritative in future phases). + +## 6) Consistency Model + +- Default replica reads: eventual/async. +- Optional stronger read: provide commit token and wait until `applied_log_index >= token.log_index`. +- Write ack policy: primary acks after local durability boundary only (replicas async). + +## 7) Durability and Crash Boundaries + +Commit must define explicit durability points: + +1. Primary WAL commit record persisted per current `sync_mode` rules. +2. Replication log frame append persisted for the same commit. +3. Commit token returned only after replication log append is durable. + +Crash model requirements: + +- Crash before token return: client may retry safely (idempotency via tx semantics/log index handling). +- Crash after token return: token must correspond to durable replication log frame. +- Replica restart resumes from persisted cursor with idempotent re-apply. + +## 8) Compatibility and Versioning + +- Keep `.kitedb` format backward compatible in V1. +- Replication metadata lives in versioned sidecar manifest + segments. +- Promotion increments epoch; stale writers must be fenced by epoch checks. + +## 9) Architecture (V1) + +### 9.1 Replication log sidecar + +- New sidecar directory adjacent to DB file. +- Segment files: append-only, checksummed tx frames. +- Manifest: current epoch, head index, retained floor, active segment metadata. +- Cursor: `epoch:segment_id:offset:log_index`. + +### 9.2 Primary responsibilities + +- On commit, append committed tx frame to replication sidecar. +- Expose snapshot + log pull interfaces. +- Track replica progress (last acknowledged cursor/index) for retention decisions. + +### 9.3 Replica responsibilities + +- Bootstrap from latest snapshot bundle. +- Catch up via log pull from snapshot start cursor. +- Persist applied cursor atomically after apply batch. +- Serve reads immediately or wait-for-token when requested. + +## 10) Code Touch Points + +Core engine: + +- `ray-rs/src/core/single_file/transaction.rs` + - Commit hook for replication append + token emission. +- `ray-rs/src/core/single_file/open.rs` + - Role/config wiring (primary/replica settings). +- `ray-rs/src/core/single_file/recovery.rs` + - Shared replay semantics reuse for replica apply path. +- `ray-rs/src/metrics/mod.rs` + - Replication lag/apply metrics. + +New module tree: + +- `ray-rs/src/replication/mod.rs` +- `ray-rs/src/replication/types.rs` +- `ray-rs/src/replication/manifest.rs` +- `ray-rs/src/replication/log_store.rs` +- `ray-rs/src/replication/primary.rs` +- `ray-rs/src/replication/replica.rs` +- `ray-rs/src/replication/token.rs` +- `ray-rs/src/replication/transport.rs` + +Binding surface (additive): + +- `ray-rs/src/napi_bindings/database.rs` +- `ray-rs/src/pyo3_bindings/database.rs` + +## 11) API/Interface Additions (Additive) + +- Open options: + - replication role (`primary` | `replica` | `disabled`) + - replication sidecar path (optional default derived from DB path) + - pull/apply tuning (chunk bytes, poll interval, max batch) +- Primary status: + - replication head index/epoch + - retained floor + - per-replica lag +- Replica status: + - applied index/epoch + - last pull/apply error +- Read wait: + - `wait_for_token(token, timeout_ms)` style helper. + +## 12) Transport Contract (Pull-First) + +- `GET /replication/snapshot/latest` + - Returns snapshot bytes + metadata (checksum, epoch, start cursor/index). +- `GET /replication/log?cursor=...&max_bytes=...` + - Returns ordered tx frames + next cursor + eof marker. +- `GET /replication/status` + - Primary/replica status for observability. +- `POST /replication/promote` + - Manual promotion to next epoch (authenticated). + +Protocol requirement: all payloads versioned to allow push transport later with same frame/cursor model. + +## 13) Retention Policy + +- Segment rotation by size (default 64MB). +- Retain at least: + - minimum time window (operator-configured), and + - min cursor needed by active replicas. +- If replica falls behind retained floor: + - mark `needs_reseed`, + - force snapshot bootstrap. + +## 14) Failure Modes and Handling + +1. Corrupt segment/frame checksum: + - stop apply, surface hard error, require retry/reseed policy. +2. Missing segment due to retention: + - deterministic `needs_reseed` status. +3. Network interruption: + - retry with backoff, resume from durable cursor. +4. Promotion race: + - epoch fencing rejects stale primary writes. +5. Primary crash mid-commit: + - recovery ensures token/log durability invariant holds. + +## 15) Performance Constraints + +- Disabled replication path: <3% regression on write/read microbenchmarks. +- Enabled replication: + - bounded p95 commit overhead target (to be locked in benchmark baseline run). + - replica apply throughput >= primary sustained commit rate at target topology. +- Keep commit hot path branch-light when replication disabled. + +## 16) Test-Driven Delivery Model (Red/Green First) + +### Phase workflow (mandatory) + +1. Red: + - Define phase contract/invariants. + - Add failing tests for that phase before implementation. +2. Green: + - Implement only enough to pass the new failing tests. +3. Refactor/Hardening: + - Cleanups, edge-case coverage, failure-path tests, perf checks. +4. Phase gate: + - No phase is complete until all red tests are green and phase exit checks pass. + +### Test layout + +- Module-level tests in `ray-rs/src/replication/*` for parser/state invariants. +- Cross-module integration tests in `ray-rs/tests/replication_*.rs`. +- Fault-injection tests in dedicated `ray-rs/tests/replication_faults_*.rs`. +- Perf checks in existing benchmark harnesses with replication-on/off variants. + +### Global test matrix + +- Unit: + - cursor/token encode/decode. + - frame checksum and parse validation. + - segment rotation and retention math. + - idempotent apply for duplicate/replayed chunks. +- Integration: + - snapshot bootstrap + incremental catch-up. + - replica restart + resume cursor. + - background checkpoint during active replication. + - token wait semantics on replica. + - manual promotion and stale writer fencing. +- Fault injection: + - crash before/after token return boundary. + - truncated frame/chunk. + - corrupt snapshot metadata. + - replica far behind retained floor. +- Performance: + - baseline local mode (replication off). + - replication-on write latency/throughput. + - catch-up time for large backlog. + +## 17) Detailed Delivery Phases (Per-Phase Red/Green Gates) + +### Phase A: Invariants + sidecar primitives + +Objective: + +- Freeze wire/storage invariants and build deterministic sidecar primitives. + +Red tests first: + +- Invalid token/cursor strings are rejected. +- Token/cursor ordering comparator is monotonic and epoch-aware. +- Corrupt segment frame checksum fails read/scan. +- Manifest interrupted-write simulation never yields partial-valid state. +- Segment append/read roundtrip preserves frame boundaries and indices. + +Green implementation: + +- Add `replication` module skeleton and core types. +- Implement versioned manifest read/write with atomic replace semantics. +- Implement segment append/read and frame checksum verification. +- Freeze token/cursor format and parser behavior. + +Robustness checks: + +- Fuzz/property-like tests on token/cursor parser. +- Recovery tests for manifest reload after simulated interruption. + +Phase exit criteria: + +- All Phase A red tests green. +- No API breakage. +- Sidecar primitives deterministic across restart. + +### Phase B: Primary commit integration + +Objective: + +- Integrate replication append/token generation into primary commit path without regressing disabled mode. + +Red tests first: + +- Commit returns monotonic token (`epoch:log_index`) for successful writes. +- Replication-disabled mode produces no sidecar append activity. +- Sidecar append failure causes commit failure (no token emitted). +- Commit ordering remains serialized and token order matches commit order under concurrent writers. +- Crash boundary test: token is never returned for non-durable replication frame. + +Green implementation: + +- Hook replication append into `single_file::transaction::commit`. +- Add replication config wiring in open options. +- Emit token and expose primary replication status. +- Add basic replication metrics counters/gauges. + +Robustness checks: + +- Regression benchmark: replication off path <3% overhead. +- Negative-path tests for IO errors on sidecar append/fsync. + +Phase exit criteria: + +- All Phase B red tests green. +- Disabled path performance gate passes. +- Durability/token invariant verified by crash-boundary tests. + +### Phase C: Replica bootstrap + steady-state apply + +Objective: + +- Build replica bootstrap/catch-up/apply loop with idempotency and token-wait semantics. + +Red tests first: + +- Replica bootstrap from snapshot reaches exact primary state. +- Incremental catch-up applies committed frames in order. +- Duplicate chunk delivery is idempotent (no double-apply). +- Replica restart resumes from durable cursor without divergence. +- Token wait returns success on catch-up and timeout when lag persists. + +Green implementation: + +- Implement snapshot bootstrap flow and continuity validation. +- Implement pull loop (`cursor`, `max_bytes`, retry/backoff). +- Implement apply pipeline using replay semantics + applied-index persistence. +- Add replica status surface (applied index, lag, last error). + +Robustness checks: + +- Checkpoint interleaving tests (primary background checkpoint while replica catches up). +- Large backlog catch-up throughput and memory boundedness tests. + +Phase exit criteria: + +- All Phase C red tests green. +- Replica apply remains deterministic across restart/retry scenarios. +- Token-wait semantics validated end-to-end. + +### Phase D: Promotion + retention + hardening + +Objective: + +- Add manual promotion with fencing and finalize retention/failure behavior. + +Red tests first: + +- Promotion increments epoch and fences stale primary writes. +- Retention respects min active replica cursor and configured minimum window. +- Missing segment response deterministically marks replica `needs_reseed`. +- Lagging replica beyond retention floor requires snapshot reseed and recovers. +- Promotion race cases do not allow split-brain writes. + +Green implementation: + +- Implement manual promote flow and epoch fencing checks. +- Implement replica progress tracking and retention pruning. +- Add explicit reseed path/status when continuity is broken. +- Finalize status/admin interfaces for ops visibility. + +Robustness checks: + +- Fault-injection sweep for corruption/network/partial transfer. +- Soak tests at target topology (`1 + up to 5`) with lag churn. + +Phase exit criteria: + +- All Phase D red tests green. +- No split-brain write acceptance in promotion tests. +- Retention and reseed behavior deterministic and observable. + +## 18) Per-Phase Done Definition + +- Phase-specific red tests were added before implementation. +- Green implementation passed with no skipped phase tests. +- Failure-mode tests for that phase are green. +- Metrics/status fields for that phase are present and documented. +- Phase summary notes include known limits and next-phase carry-over items. + +## 19) Open Questions + +- None blocking V1 scope. +- Locked for V1 gate: + - Commit overhead budget: `P95_MAX_RATIO=1.30` (replication-on p95 / baseline p95). + - ANN default: latency-first IVF-PQ (`residuals=false`, `pq_subspaces=48`, `pq_centroids=256`). + - Authoritative vector replication scope: logical vector property mutations (`SetNodeVector` / `DelNodeVector`). + +## 20) Phase D Summary (February 8, 2026) + +Implemented: + +- Manual promotion API with epoch fencing (`stale primary` rejected on stale writer commit). +- Retention controls (segment rotation threshold + min retained entries) and primary retention execution. +- Time-window retention control (`replication_retention_min_ms`) to avoid pruning very recent segments. +- Replica progress reporting and per-replica lag visibility on primary status. +- Deterministic reseed signaling (`needs_reseed`) for retained-floor/continuity breaks. +- Explicit replica reseed API from snapshot. +- Binding parity for replication admin/status in Node NAPI and Python PyO3 surfaces. +- Host-runtime Prometheus replication exporter API in Rust core + Node NAPI + Python PyO3 (`collect_replication_metrics_prometheus*`). +- Host-runtime OpenTelemetry OTLP-JSON replication exporter API in Rust core + Node NAPI + Python PyO3 (`collect_replication_metrics_otel_json*`). +- Host-runtime OpenTelemetry collector push transport (HTTP OTLP-JSON) in Rust core + Node NAPI + Python PyO3 (`push_replication_metrics_otel_json_single_file`, `pushReplicationMetricsOtelJson`, `push_replication_metrics_otel_json`). +- Host-runtime OpenTelemetry OTLP-protobuf replication exporter API in Rust core + Node NAPI + Python PyO3 (`collect_replication_metrics_otel_protobuf*`). +- Host-runtime OpenTelemetry collector push transport (HTTP OTLP-protobuf) in Rust core + Node NAPI + Python PyO3 (`push_replication_metrics_otel_protobuf_single_file`, `pushReplicationMetricsOtelProtobuf`, `push_replication_metrics_otel_protobuf`). +- Host-runtime OpenTelemetry collector push transport (OTLP gRPC Export) in Rust core + Node NAPI + Python PyO3 (`push_replication_metrics_otel_grpc_single_file`, `pushReplicationMetricsOtelGrpc`, `push_replication_metrics_otel_grpc`). +- Host-runtime OTLP transport hardening for TLS/mTLS (HTTPS-only mode, custom CA trust, optional client cert/key auth). +- Host-runtime OTLP adaptive retry/backoff/jitter/compression + circuit-breaker controls in Rust core + Node NAPI + Python PyO3 (`adaptive_retry`, `adaptive_retry_mode`, `adaptive_retry_ewma_alpha`, `retry_max_attempts`, `retry_backoff_ms`, `retry_backoff_max_ms`, `retry_jitter_ratio`, `circuit_breaker_failure_threshold`, `circuit_breaker_open_ms`, `circuit_breaker_half_open_probes`, `circuit_breaker_state_path`, `circuit_breaker_state_url`, `circuit_breaker_state_patch`, `circuit_breaker_state_patch_batch`, `circuit_breaker_state_patch_batch_max_keys`, `circuit_breaker_state_patch_merge`, `circuit_breaker_state_patch_merge_max_keys`, `circuit_breaker_state_patch_retry_max_attempts`, `circuit_breaker_state_cas`, `circuit_breaker_state_lease_id`, `circuit_breaker_scope_key`, `compression_gzip`). +- Host-runtime replication transport JSON export surfaces for embedding HTTP endpoints beyond playground runtime: + - snapshot export (`collectReplicationSnapshotTransportJson` / `collect_replication_snapshot_transport_json`) + - log page export with cursor/limits (`collectReplicationLogTransportJson` / `collect_replication_log_transport_json`). + - TypeScript adapter helper (`createReplicationTransportAdapter`) for wiring custom HTTP handlers. + - TypeScript admin auth helper (`createReplicationAdminAuthorizer`) with token/mTLS modes and optional native TLS matcher hook. + - TypeScript Node native TLS matcher helper (`createNodeTlsMtlsMatcher` / `isNodeTlsClientAuthorized`) for common request socket layouts. + - TypeScript forwarded-header TLS matcher helper (`createForwardedTlsMtlsMatcher` / `isForwardedTlsClientAuthorized`) for proxy-terminated TLS/mTLS runtimes beyond Node-native sockets. + - Python admin auth helper (`create_replication_admin_authorizer`) with token/mTLS modes and ASGI native TLS matcher hook (`create_asgi_tls_mtls_matcher` / `is_asgi_tls_client_authorized`). +- Polyglot host-runtime HTTP adapter templates: + - Node Express template (`docs/examples/replication_adapter_node_express.ts`) + - Node proxy-forwarded template (`docs/examples/replication_adapter_node_proxy_forwarded.ts`) + - Python FastAPI template (`docs/examples/replication_adapter_python_fastapi.py`) + - generic middleware template (`docs/examples/replication_adapter_generic_middleware.ts`). +- Host-runtime transport/admin flow validation added for both bindings: + - Node AVA test (`ray-rs/__test__/replication_transport_flow.spec.ts`) + - Python pytest test (`ray-rs/python/tests/test_replication_transport_flow.py`). +- Replica source transport hardening in host-runtime open path (required source DB path + source/local sidecar collision fencing). +- Operator runbook for promotion/reseed/retention tuning (`docs/REPLICATION_RUNBOOK.md`). +- V1 release checklist finalized in runbook (`docs/REPLICATION_RUNBOOK.md`, section `10. V1 Release Checklist`) including host-runtime flow gates and release/tag checks. +- Replication benchmark gate script (`ray-rs/scripts/replication-bench-gate.sh`) + benchmark doc wiring. +- Replica catch-up throughput gate (`ray-rs/scripts/replication-catchup-gate.sh`) and combined perf gate (`ray-rs/scripts/replication-perf-gate.sh`). +- Replication soak stability harness + gate (`ray-rs/examples/replication_soak_bench.rs`, `ray-rs/scripts/replication-soak-gate.sh`) covering lag churn, promotion fencing, reseed recovery, and zero-divergence checks. +- Non-blocking replication soak trend tracking in CI (`replication-soak-tracking` job in `.github/workflows/ray-rs.yml`, weekly schedule + manual dispatch with `replication_soak_profile=fast|full`, artifact `replication-soak-tracking-logs`, run-scoped `ci--` stamp). +- Release preflight script (`ray-rs/scripts/release-preflight.sh`) enforcing commit-message format and tag/package/version alignment. +- Main-branch CI perf-gate enforcement in `ray-rs` workflow (`.github/workflows/ray-rs.yml`) with run-scoped replication benchmark log artifact upload (`ci--` stamp). +- Main-branch CI ANN quality-gate enforcement in `ray-rs` workflow (`.github/workflows/ray-rs.yml`) with ANN gate log artifact upload. +- Vector replication authority decision: canonical vector property mutations replicate (`SetNodeVector`/`DelNodeVector`); derived vector maintenance WAL records are non-authoritative and skipped during replica apply. +- Vector compaction strategy benchmark harness (`ray-rs/examples/vector_compaction_bench.rs`) for ANN/compaction tuning experiments. +- Vector compaction matrix script + baseline snapshot (`ray-rs/scripts/vector-compaction-matrix.sh`, `docs/benchmarks/results/2026-02-08-vector-compaction-*.{txt,csv}`) with recommendation to keep current compaction defaults. +- ANN algorithm benchmark harness + matrix script (`ray-rs/examples/vector_ann_bench.rs`, `ray-rs/scripts/vector-ann-matrix.sh`) with baseline artifact snapshot (`docs/benchmarks/results/2026-02-08-vector-ann-matrix.{txt,csv}`). +- ANN PQ tuning sweep + ANN recall/p95 gate scripts (`ray-rs/scripts/vector-ann-pq-tuning.sh`, `ray-rs/scripts/vector-ann-gate.sh`) with artifact snapshots (`docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.{txt,csv}`, `docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt*.txt`). +- Latency-first ANN default selection: IVF-PQ (`residuals=false`, `pq_subspaces=48`, `pq_centroids=256`) with CI quality floor via `vector-ann-gate.sh`. +- Non-blocking ANN PQ trend tracking in CI (`ann-pq-tracking` job in `.github/workflows/ray-rs.yml`, weekly schedule + manual dispatch with `ann_pq_profile=fast|full`, artifact `ann-pq-tracking-logs`, run-scoped `ci--` stamp). +- HTTP transport/admin rollout in playground runtime: + - `GET /api/replication/status` + - `GET /api/replication/metrics` (Prometheus text export) + - `GET /api/replication/snapshot/latest` + - `GET /api/replication/log` + - `GET /api/replication/transport/snapshot` (host-runtime transport export passthrough) + - `GET /api/replication/transport/log` (host-runtime transport export passthrough) + - `POST /api/replication/pull` + - `POST /api/replication/reseed` + - `POST /api/replication/promote` + - configurable admin auth via `REPLICATION_ADMIN_AUTH_MODE` (`token|mtls|token_or_mtls|token_and_mtls`). + - native HTTPS listener + TLS client-cert enforcement support for mTLS auth in playground runtime. + +Validated tests: + +- `ray-rs/tests/replication_phase_d.rs` (promotion, retention, reseed, split-brain race). +- `ray-rs/tests/replication_faults_phase_d.rs` (corrupt/truncated segment fault paths + durable `last_error`). + +Known limits: + +- Bundled HTTP admin endpoints currently ship in playground runtime only; host runtime provides JSON export helpers for embedding custom endpoints. +- Host-runtime OTLP export supports HTTP OTLP-JSON, HTTP OTLP-protobuf, and OTLP gRPC push paths. + +Carry-over to next phase: + +- None for OTLP shared-state patch transport hardening in Phase D. + +## 21) Next Steps (Post-Phase-D) + +1. V1 release gate dry-run: + - Execute `ray-rs/scripts/replication-perf-gate.sh` and `ray-rs/scripts/vector-ann-gate.sh` on release-like hardware. + - Capture artifacts under `docs/benchmarks/results/` with a new date stamp. +2. Long-run stability soak: + - Keep running `ray-rs/scripts/replication-soak-gate.sh` in tracking mode (manual/scheduled CI) and tune thresholds from trend data. + - Expanded scenario depth path available via CI manual dispatch profile `replication_soak_profile=full`; continue threshold tuning from trend data on release-like hardware before V1 cut. +3. Host runtime adoption pass: + - Keep adapter examples + Node/Python host-runtime flow tests green as API evolves. + - Completed: proxy-terminated deployment sample with forwarded-header mTLS auth checks (`docs/examples/replication_adapter_node_proxy_forwarded.ts`). +4. Release packaging + docs closeout: + - run release checklist (`docs/REPLICATION_RUNBOOK.md`, section `10. V1 Release Checklist`) on release-like hardware. + - cut release commit/tag using release-note/tag rules from `AGENTS.md`. diff --git a/docs/REPLICATION_RUNBOOK.md b/docs/REPLICATION_RUNBOOK.md new file mode 100644 index 0000000..afb8be7 --- /dev/null +++ b/docs/REPLICATION_RUNBOOK.md @@ -0,0 +1,299 @@ +# Replication Operations Runbook (V1) + +Scope: + +- Single-file deployment mode (`.kitedb`) with sidecar replication. +- Roles: one writable primary, one or more replicas. +- APIs available in Rust core, Node NAPI, and Python bindings. + +## 1. Operational Signals + +Primary status fields: + +- `epoch`: current leadership epoch. +- `head_log_index`: latest committed replication log index. +- `retained_floor`: lowest retained index after pruning. +- `replica_lags[]`: per-replica applied position. +- `append_attempts|append_failures|append_successes`: commit-path replication health. + +Replica status fields: + +- `applied_epoch`, `applied_log_index`: durable apply cursor. +- `last_error`: latest pull/apply failure detail. +- `needs_reseed`: continuity break or floor violation; snapshot reseed required. + +Metrics surface: + +- `collect_metrics()` now includes `replication` with role (`primary|replica|disabled`) plus + role-specific replication counters/state for dashboards and alerting. +- Host-runtime Prometheus text export is available via: + - Rust core: `collect_replication_metrics_prometheus_single_file(...)` + - Node NAPI: `collectReplicationMetricsPrometheus(db)` + - Python PyO3: `collect_replication_metrics_prometheus(db)` +- Host-runtime OpenTelemetry OTLP-JSON export is available via: + - Rust core: `collect_replication_metrics_otel_json_single_file(...)` + - Node NAPI: `collectReplicationMetricsOtelJson(db)` + - Python PyO3: `collect_replication_metrics_otel_json(db)` +- Host-runtime OpenTelemetry OTLP-protobuf export is available via: + - Rust core: `collect_replication_metrics_otel_protobuf_single_file(...)` + - Node NAPI: `collectReplicationMetricsOtelProtobuf(db)` + - Python PyO3: `collect_replication_metrics_otel_protobuf(db)` +- Host-runtime OpenTelemetry collector push is available via: + - Rust core: `push_replication_metrics_otel_json_single_file(db, endpoint, timeout_ms, bearer_token)` + - advanced TLS/mTLS: `push_replication_metrics_otel_json_*_with_options(...)` with + `https_only`, `ca_cert_pem_path`, `client_cert_pem_path`, `client_key_pem_path`, + `retry_max_attempts`, `retry_backoff_ms`, `retry_backoff_max_ms`, `retry_jitter_ratio`, + `adaptive_retry`, `adaptive_retry_mode`, `adaptive_retry_ewma_alpha`, `circuit_breaker_failure_threshold`, `circuit_breaker_open_ms`, `circuit_breaker_half_open_probes`, + `circuit_breaker_state_path`, `circuit_breaker_state_url`, `circuit_breaker_state_patch`, `circuit_breaker_state_patch_batch`, `circuit_breaker_state_patch_batch_max_keys`, `circuit_breaker_state_patch_merge`, `circuit_breaker_state_patch_merge_max_keys`, `circuit_breaker_state_patch_retry_max_attempts`, `circuit_breaker_state_cas`, `circuit_breaker_state_lease_id`, `circuit_breaker_scope_key`, `compression_gzip`. + - Rust core (protobuf): `push_replication_metrics_otel_protobuf_single_file(db, endpoint, timeout_ms, bearer_token)` + - advanced TLS/mTLS: `push_replication_metrics_otel_protobuf_*_with_options(...)` with + `https_only`, `ca_cert_pem_path`, `client_cert_pem_path`, `client_key_pem_path`, + `retry_max_attempts`, `retry_backoff_ms`, `retry_backoff_max_ms`, `retry_jitter_ratio`, + `adaptive_retry`, `adaptive_retry_mode`, `adaptive_retry_ewma_alpha`, `circuit_breaker_failure_threshold`, `circuit_breaker_open_ms`, `circuit_breaker_half_open_probes`, + `circuit_breaker_state_path`, `circuit_breaker_state_url`, `circuit_breaker_state_patch`, `circuit_breaker_state_patch_batch`, `circuit_breaker_state_patch_batch_max_keys`, `circuit_breaker_state_patch_merge`, `circuit_breaker_state_patch_merge_max_keys`, `circuit_breaker_state_patch_retry_max_attempts`, `circuit_breaker_state_cas`, `circuit_breaker_state_lease_id`, `circuit_breaker_scope_key`, `compression_gzip`. + - Rust core (gRPC): `push_replication_metrics_otel_grpc_single_file(db, endpoint, timeout_ms, bearer_token)` + - advanced TLS/mTLS: `push_replication_metrics_otel_grpc_*_with_options(...)` with + `https_only`, `ca_cert_pem_path`, `client_cert_pem_path`, `client_key_pem_path`, + `retry_max_attempts`, `retry_backoff_ms`, `retry_backoff_max_ms`, `retry_jitter_ratio`, + `adaptive_retry`, `adaptive_retry_mode`, `adaptive_retry_ewma_alpha`, `circuit_breaker_failure_threshold`, `circuit_breaker_open_ms`, `circuit_breaker_half_open_probes`, + `circuit_breaker_state_path`, `circuit_breaker_state_url`, `circuit_breaker_state_patch`, `circuit_breaker_state_patch_batch`, `circuit_breaker_state_patch_batch_max_keys`, `circuit_breaker_state_patch_merge`, `circuit_breaker_state_patch_merge_max_keys`, `circuit_breaker_state_patch_retry_max_attempts`, `circuit_breaker_state_cas`, `circuit_breaker_state_lease_id`, `circuit_breaker_scope_key`, `compression_gzip`. + - Node NAPI: `pushReplicationMetricsOtelJson(db, endpoint, timeoutMs, bearerToken?)` + - advanced TLS/mTLS: `pushReplicationMetricsOtelJsonWithOptions(db, endpoint, options)`. + - Node NAPI (protobuf): `pushReplicationMetricsOtelProtobuf(db, endpoint, timeoutMs, bearerToken?)` + - advanced TLS/mTLS: `pushReplicationMetricsOtelProtobufWithOptions(db, endpoint, options)`. + - Node NAPI (gRPC): `pushReplicationMetricsOtelGrpc(db, endpoint, timeoutMs, bearerToken?)` + - advanced TLS/mTLS: `pushReplicationMetricsOtelGrpcWithOptions(db, endpoint, options)`. + - Python PyO3: `push_replication_metrics_otel_json(db, endpoint, timeout_ms=5000, bearer_token=None)` + - advanced TLS/mTLS kwargs: + `https_only`, `ca_cert_pem_path`, `client_cert_pem_path`, `client_key_pem_path`, + `retry_max_attempts`, `retry_backoff_ms`, `retry_backoff_max_ms`, `retry_jitter_ratio`, + `adaptive_retry`, `adaptive_retry_mode`, `adaptive_retry_ewma_alpha`, `circuit_breaker_failure_threshold`, `circuit_breaker_open_ms`, `circuit_breaker_half_open_probes`, + `circuit_breaker_state_path`, `circuit_breaker_state_url`, `circuit_breaker_state_patch`, `circuit_breaker_state_patch_batch`, `circuit_breaker_state_patch_batch_max_keys`, `circuit_breaker_state_patch_merge`, `circuit_breaker_state_patch_merge_max_keys`, `circuit_breaker_state_patch_retry_max_attempts`, `circuit_breaker_state_cas`, `circuit_breaker_state_lease_id`, `circuit_breaker_scope_key`, `compression_gzip`. + - Python PyO3 (protobuf): `push_replication_metrics_otel_protobuf(db, endpoint, timeout_ms=5000, bearer_token=None)` + - advanced TLS/mTLS kwargs: + `https_only`, `ca_cert_pem_path`, `client_cert_pem_path`, `client_key_pem_path`, + `retry_max_attempts`, `retry_backoff_ms`, `retry_backoff_max_ms`, `retry_jitter_ratio`, + `adaptive_retry`, `adaptive_retry_mode`, `adaptive_retry_ewma_alpha`, `circuit_breaker_failure_threshold`, `circuit_breaker_open_ms`, `circuit_breaker_half_open_probes`, + `circuit_breaker_state_path`, `circuit_breaker_state_url`, `circuit_breaker_state_patch`, `circuit_breaker_state_patch_batch`, `circuit_breaker_state_patch_batch_max_keys`, `circuit_breaker_state_patch_merge`, `circuit_breaker_state_patch_merge_max_keys`, `circuit_breaker_state_patch_retry_max_attempts`, `circuit_breaker_state_cas`, `circuit_breaker_state_lease_id`, `circuit_breaker_scope_key`, `compression_gzip`. + - Python PyO3 (gRPC): `push_replication_metrics_otel_grpc(db, endpoint, timeout_ms=5000, bearer_token=None)` + - advanced TLS/mTLS kwargs: + `https_only`, `ca_cert_pem_path`, `client_cert_pem_path`, `client_key_pem_path`, + `retry_max_attempts`, `retry_backoff_ms`, `retry_backoff_max_ms`, `retry_jitter_ratio`, + `adaptive_retry`, `adaptive_retry_mode`, `adaptive_retry_ewma_alpha`, `circuit_breaker_failure_threshold`, `circuit_breaker_open_ms`, `circuit_breaker_half_open_probes`, + `circuit_breaker_state_path`, `circuit_breaker_state_url`, `circuit_breaker_state_patch`, `circuit_breaker_state_patch_batch`, `circuit_breaker_state_patch_batch_max_keys`, `circuit_breaker_state_patch_merge`, `circuit_breaker_state_patch_merge_max_keys`, `circuit_breaker_state_patch_retry_max_attempts`, `circuit_breaker_state_cas`, `circuit_breaker_state_lease_id`, `circuit_breaker_scope_key`, `compression_gzip`. + - Note: `circuit_breaker_state_path` and `circuit_breaker_state_url` are mutually exclusive. + - Note: `circuit_breaker_state_patch`, `circuit_breaker_state_patch_batch`, `circuit_breaker_state_patch_batch_max_keys`, `circuit_breaker_state_patch_merge`, `circuit_breaker_state_patch_merge_max_keys`, `circuit_breaker_state_patch_retry_max_attempts`, `circuit_breaker_state_cas`, and `circuit_breaker_state_lease_id` require `circuit_breaker_state_url`. +- Host-runtime replication transport JSON export helpers are available via: + - Node NAPI: `collectReplicationSnapshotTransportJson(db, includeData?)`, + `collectReplicationLogTransportJson(db, cursor?, maxFrames?, maxBytes?, includePayload?)` + - TypeScript adapter helper: `createReplicationTransportAdapter(db)` in `ray-rs/ts/replication_transport.ts` + - TypeScript admin auth helper: `createReplicationAdminAuthorizer({ mode, token, mtlsHeader, mtlsSubjectRegex, mtlsMatcher? })` + for `none|token|mtls|token_or_mtls|token_and_mtls` with optional native TLS verifier hook (`mtlsMatcher`). + - TypeScript native TLS matcher helper: `createNodeTlsMtlsMatcher({ requirePeerCertificate? })` + and probe helper `isNodeTlsClientAuthorized(request, options?)` for common Node request socket shapes + (`request.socket`, `request.client`, `request.raw.socket`, `request.req.socket`). + - TypeScript forwarded-header matcher helper: `createForwardedTlsMtlsMatcher({ requirePeerCertificate?, requireVerifyHeader?, verifyHeaders?, certHeaders?, successValues? })` + and probe helper `isForwardedTlsClientAuthorized(request, options?)` for proxy-terminated TLS/mTLS in non-Node-native runtimes. + - Python PyO3: `collect_replication_snapshot_transport_json(db, include_data=False)`, + `collect_replication_log_transport_json(db, cursor=None, max_frames=128, max_bytes=1048576, include_payload=True)` + - Python host auth helper: `create_replication_admin_authorizer(...)` with `ReplicationAdminAuthConfig` + and ASGI native TLS matcher helpers `create_asgi_tls_mtls_matcher(...)` / `is_asgi_tls_client_authorized(...)`. + - These are intended for embedding host-side HTTP endpoints beyond playground runtime. + - Template files: + - Node Express adapter: `docs/examples/replication_adapter_node_express.ts` + - Node proxy-forwarded adapter: `docs/examples/replication_adapter_node_proxy_forwarded.ts` + - Python FastAPI adapter: `docs/examples/replication_adapter_python_fastapi.py` + - Generic middleware adapter: `docs/examples/replication_adapter_generic_middleware.ts` + +Alert heuristics: + +- `append_failures > 0` growing: primary sidecar durability issue. +- Replica lag growth over steady traffic: pull/apply bottleneck. +- `needs_reseed == true`: force reseed, do not keep retrying catch-up. + +## 2. Bootstrap a New Replica + +Prerequisite: + +- Quiesce writes on the source primary during `replica_bootstrap_from_snapshot()`. +- If writes continue, bootstrap now fails fast with a `quiesce writes and retry` error. + +1. Open replica with: + - `replication_role=replica` + - `replication_source_db_path` + - `replication_source_sidecar_path` + - Validation hardening: + - source DB path is required and must exist as a file, + - source DB path must differ from replica DB path, + - source sidecar path must differ from local replica sidecar path. +2. Call `replica_bootstrap_from_snapshot()`. +3. Start catch-up loop with `replica_catch_up_once(max_frames)`. +4. Validate `needs_reseed == false` and `last_error == null`. + +## 3. Routine Catch-up + Retention + +Replica: + +- Poll `replica_catch_up_once(max_frames)` repeatedly. +- Persist and monitor `applied_log_index`. + +Primary: + +- Report each replica cursor via `primary_report_replica_progress(replica_id, epoch, applied_log_index)`. +- Run `primary_run_retention()` on an operator cadence. + +Tuning: + +- `replication_retention_min_entries`: set above worst-case expected replica lag. +- `replication_retention_min_ms`: keep recent segments for at least this wall-clock window. +- `replication_segment_max_bytes`: larger segments reduce file churn; smaller segments prune faster. + +## 4. Manual Promotion Procedure + +Goal: move write authority to a target node without split-brain writes. + +1. Quiesce writes on old primary (application-level write freeze). +2. Promote target primary: + - `primary_promote_to_next_epoch()`. +3. Verify: + - new primary status `epoch` incremented, + - new writes return tokens in the new epoch. +4. Confirm stale fence: + - old primary write attempts fail with stale-primary error. +5. Repoint replicas to the promoted primary source paths. + +## 5. Reseed Procedure (`needs_reseed`) + +Trigger: + +- Replica status sets `needs_reseed=true`, usually from retained-floor/continuity break. + +Steps: + +1. Stop normal catch-up loop for that replica. +2. Quiesce writes on the source primary. +3. Execute `replica_reseed_from_snapshot()`. +4. Resume `replica_catch_up_once(...)`. +5. Verify: + - `needs_reseed=false`, + - `last_error` cleared, + - data parity checks (counts and spot checks) pass. + +## 6. Failure Handling + +Corrupt/truncated segment: + +- Symptom: catch-up error + replica `last_error` set. +- Action: reseed replica from snapshot. + +Retention floor outran replica: + +- Symptom: catch-up error mentions reseed/floor; `needs_reseed=true`. +- Action: reseed; increase `replication_retention_min_entries` if frequent. + +Promotion race / split-brain suspicion: + +- Symptom: concurrent promote/write attempts. +- Expected: exactly one writer succeeds post-promotion. +- Action: treat stale-writer failures as correct fencing; ensure client routing points to current epoch primary. + +## 7. Validation Checklist + +Before rollout: + +- `cargo test --no-default-features --test replication_phase_a --test replication_phase_b --test replication_phase_c --test replication_phase_d --test replication_faults_phase_d` +- `cargo test --no-default-features replication::` + +Perf gate: + +- Run `ray-rs/scripts/replication-perf-gate.sh`. +- Commit overhead gate: require median p95 ratio (replication-on / baseline) within `P95_MAX_RATIO` (default `1.30`, `ATTEMPTS=7`). +- Catch-up gate: require replica throughput floors (`MIN_CATCHUP_FPS`, `MIN_THROUGHPUT_RATIO`). +- Catch-up gate retries benchmark noise by default (`ATTEMPTS=3`); increase on busy dev machines. +- CI on `main` (`.github/workflows/ray-rs.yml`) enforces replication perf gate and uploads benchmark logs as `replication-perf-gate-logs` (run-scoped `ci--` stamp). +- CI also runs non-blocking replication soak tracking weekly and supports manual deep runs via workflow input `replication_soak_profile=fast|full` (artifact `replication-soak-tracking-logs`). + +## 8. HTTP Admin Endpoints (Playground Runtime) + +Available endpoints in `playground/src/api/routes.ts`: + +- `GET /api/replication/status` +- `GET /api/replication/metrics` (Prometheus text format) +- `GET /api/replication/snapshot/latest` +- `GET /api/replication/log` +- `GET /api/replication/transport/snapshot` (host-runtime transport export passthrough) +- `GET /api/replication/transport/log` (host-runtime transport export passthrough) +- `POST /api/replication/pull` (runs `replica_catch_up_once`) +- `POST /api/replication/reseed` (runs `replica_reseed_from_snapshot`) +- `POST /api/replication/promote` (runs `primary_promote_to_next_epoch`) + +Auth: + +- `REPLICATION_ADMIN_AUTH_MODE` controls admin auth: + - `none` (no admin auth) + - `token` (Bearer token) + - `mtls` (mTLS client-cert header) + - `token_or_mtls` + - `token_and_mtls` +- Token modes use `REPLICATION_ADMIN_TOKEN`. +- mTLS modes read `REPLICATION_MTLS_HEADER` (default `x-forwarded-client-cert`) and optional + subject filter `REPLICATION_MTLS_SUBJECT_REGEX`. +- Native TLS mTLS mode can be enabled with `REPLICATION_MTLS_NATIVE_TLS=true` when the + playground listener is configured with: + - `PLAYGROUND_TLS_CERT_FILE`, `PLAYGROUND_TLS_KEY_FILE` (HTTPS enablement) + - `PLAYGROUND_TLS_REQUEST_CERT=true` + - `PLAYGROUND_TLS_REJECT_UNAUTHORIZED=true` + - optional `PLAYGROUND_TLS_CA_FILE` for custom client-cert trust roots +- `REPLICATION_MTLS_SUBJECT_REGEX` applies to header-based mTLS values; native TLS mode + validates client cert handshake presence, not subject matching. +- `metrics`, `snapshot`, `log`, `pull`, `reseed`, and `promote` enforce the selected mode. +- `status` is read-only and does not require auth. + +Playground curl examples: + +- `export BASE="http://localhost:3000"` +- `curl "$BASE/api/replication/status"` +- `curl -H "Authorization: Bearer $REPLICATION_ADMIN_TOKEN" "$BASE/api/replication/metrics"` +- `curl -H "Authorization: Bearer $REPLICATION_ADMIN_TOKEN" "$BASE/api/replication/log?maxFrames=128&maxBytes=1048576"` +- `curl -X POST -H "Authorization: Bearer $REPLICATION_ADMIN_TOKEN" -H "Content-Type: application/json" -d '{"maxFrames":256}' "$BASE/api/replication/pull"` +- `curl -X POST -H "Authorization: Bearer $REPLICATION_ADMIN_TOKEN" "$BASE/api/replication/reseed"` +- `curl -X POST -H "Authorization: Bearer $REPLICATION_ADMIN_TOKEN" "$BASE/api/replication/promote"` +- `curl -H "x-client-cert: CN=allowed-client,O=RayDB" "$BASE/api/replication/metrics"` (when `REPLICATION_ADMIN_AUTH_MODE=mtls`) + +## 9. Known V1 Limits + +- Retention policy supports entry-window + time-window floors, but not richer SLA-aware policies. +- Bundled HTTP admin endpoints still ship in playground runtime; host runtime now exposes transport JSON helpers for embedding custom HTTP surfaces. +- OTLP retry policy is bounded attempt/backoff/jitter with optional adaptive multiplier (`linear` or `ewma`) and circuit-breaker half-open probes. Circuit-breaker state is process-local by default; optional file-backed sharing (`circuit_breaker_state_path`) or shared HTTP store (`circuit_breaker_state_url`) is available with `circuit_breaker_scope_key`; URL backend can enable key-scoped patch mode (`circuit_breaker_state_patch`), batched patch mode (`circuit_breaker_state_patch_batch` with `circuit_breaker_state_patch_batch_max_keys`), compacting merge patch mode (`circuit_breaker_state_patch_merge` with `circuit_breaker_state_patch_merge_max_keys`), bounded patch retries (`circuit_breaker_state_patch_retry_max_attempts`), CAS (`circuit_breaker_state_cas`), and lease header propagation (`circuit_breaker_state_lease_id`). +- Vector authority boundary: logical vector property mutations (`SetNodeVector` / `DelNodeVector`) are authoritative and replicated; vector batch/fragment maintenance records are treated as derived index artifacts and are skipped during replica apply. +- `SyncMode::Normal` and `SyncMode::Off` optimize commit latency by batching sidecar frame writes in-memory and refreshing manifest fencing periodically (not every commit). For strict per-commit sidecar visibility/fencing, use `SyncMode::Full`. + +## 10. V1 Release Checklist + +1. Correctness gate: + - `cd ray-rs && cargo test --no-default-features --test replication_phase_a --test replication_phase_b --test replication_phase_c --test replication_phase_d --test replication_faults_phase_d` +2. Host-runtime flow gate: + - `cd ray-rs && bunx ava __test__/replication_transport_auth.spec.ts __test__/replication_transport_flow.spec.ts` + - `cd ray-rs && .venv/bin/python -m pytest -q python/tests/test_replication_auth.py python/tests/test_replication_transport_flow.py` +3. Performance gate (release-like host): + - `cd ray-rs && ./scripts/replication-perf-gate.sh` + - `cd ray-rs && ./scripts/replication-soak-gate.sh` + - `cd ray-rs && ./scripts/vector-ann-gate.sh` +4. Artifact capture: + - ensure benchmark logs are written under `docs/benchmarks/results/` with a dedicated `STAMP` for the release run. +5. Release preflight checks (AGENTS rules): + - `cd ray-rs && ./scripts/release-preflight.sh --commit-msg \"core: X.Y.Z\" --tag vX.Y.Z` + - This enforces: + - exact commit message format `all|js|ts|py|rs|core: X.Y.Z` (no trailing text), + - tag format `vX.Y.Z`, + - `ray-rs/package.json` version == tag version, + - commit message version == tag version. +6. Cut release commit and tag: + - commit message must be exactly one of: + - `all: X.Y.Z` + - `js: X.Y.Z` + - `ts: X.Y.Z` + - `py: X.Y.Z` + - `rs: X.Y.Z` + - `core: X.Y.Z` + - then create tag `vX.Y.Z` and push commit + tag. diff --git a/docs/benchmarks/results/2026-02-05-index-pipeline-hypothesis-embed200.txt b/docs/benchmarks/results/2026-02-05-index-pipeline-hypothesis-embed200.txt new file mode 100644 index 0000000..c82ecb1 --- /dev/null +++ b/docs/benchmarks/results/2026-02-05-index-pipeline-hypothesis-embed200.txt @@ -0,0 +1,44 @@ +================================================================== +Index Pipeline Hypothesis Benchmark +================================================================== +Mode: Both +Changes: 200 +Working set: 200 +Vector dims: 128 +Parse latency: tree-sitter=2ms scip=6ms +Embed latency: 200ms per batch +Embed batching: size=32 flush=20ms inflight=4 +Vector apply batch size: 64 +WAL size: 1073741824 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Seed: 42 +================================================================== + +--- sequential --- +Changes: 200 +Vectors applied: 200 +Hot path elapsed: 42.477s +Total elapsed: 42.679s +Hot path rate: 4.71/s +End-to-end rate: 4.69/s +Hot path latency: p50=10.04ms p95=10.22ms p99=10.98ms +Vector freshness: p50=204.09ms p95=205.11ms p99=206.13ms + +--- parallel --- +Changes: 200 +Vectors applied: 189 +Hot path elapsed: 1.448s +Total elapsed: 1.687s +Hot path rate: 138.14/s +End-to-end rate: 118.56/s +Hot path latency: p50=7.54ms p95=7.60ms p99=7.65ms +Vector freshness: p50=520.38ms p95=775.61ms p99=845.95ms +Queue: enqueued=200 replaced=11 (5.50%) max_depth=23 avg_depth=8.58 + +=== Comparison (sequential vs parallel) === +Hot path elapsed speedup: 29.34x +End-to-end elapsed speedup: 25.30x +Hot p95: 10.22ms -> 7.60ms +Freshness p95: 205.11ms -> 775.61ms diff --git a/docs/benchmarks/results/2026-02-05-index-pipeline-hypothesis-embed50.txt b/docs/benchmarks/results/2026-02-05-index-pipeline-hypothesis-embed50.txt new file mode 100644 index 0000000..18da4c3 --- /dev/null +++ b/docs/benchmarks/results/2026-02-05-index-pipeline-hypothesis-embed50.txt @@ -0,0 +1,44 @@ +================================================================== +Index Pipeline Hypothesis Benchmark +================================================================== +Mode: Both +Changes: 200 +Working set: 200 +Vector dims: 128 +Parse latency: tree-sitter=1ms scip=1ms +Embed latency: 50ms per batch +Embed batching: size=32 flush=20ms inflight=4 +Vector apply batch size: 64 +WAL size: 1073741824 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Seed: 42 +================================================================== + +--- sequential --- +Changes: 200 +Vectors applied: 200 +Hot path elapsed: 11.260s +Total elapsed: 11.314s +Hot path rate: 17.76/s +End-to-end rate: 17.68/s +Hot path latency: p50=2.57ms p95=2.64ms p99=2.71ms +Vector freshness: p50=54.87ms p95=55.09ms p99=55.15ms + +--- parallel --- +Changes: 200 +Vectors applied: 188 +Hot path elapsed: 0.255s +Total elapsed: 0.329s +Hot path rate: 783.55/s +End-to-end rate: 607.46/s +Hot path latency: p50=1.27ms p95=1.30ms p99=1.35ms +Vector freshness: p50=123.01ms p95=168.43ms p99=181.80ms +Queue: enqueued=200 replaced=12 (6.00%) max_depth=34 avg_depth=13.29 + +=== Comparison (sequential vs parallel) === +Hot path elapsed speedup: 44.11x +End-to-end elapsed speedup: 34.36x +Hot p95: 2.64ms -> 1.30ms +Freshness p95: 55.09ms -> 168.43ms diff --git a/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt1.txt b/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt1.txt new file mode 100644 index 0000000..e50acaa --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt1.txt @@ -0,0 +1,14 @@ +replication_catchup_bench +sync_mode: normal +seed_commits: 1000 +backlog_commits: 3000 +max_frames: 256 +applied_frames: 3234 +catchup_loops: 13 +produce_elapsed_ms: 142.064 +catchup_elapsed_ms: 240.940 +primary_frames_per_sec: 21117.31 +catchup_frames_per_sec: 13422.42 +throughput_ratio: 0.6356 +primary_head_log_index: 4000 +replica_applied: 1:4000 diff --git a/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt2.txt b/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt2.txt new file mode 100644 index 0000000..d1c5fe7 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt2.txt @@ -0,0 +1,14 @@ +replication_catchup_bench +sync_mode: normal +seed_commits: 1000 +backlog_commits: 5000 +max_frames: 256 +applied_frames: 5234 +catchup_loops: 21 +produce_elapsed_ms: 204.601 +catchup_elapsed_ms: 2976.828 +primary_frames_per_sec: 24437.78 +catchup_frames_per_sec: 1758.25 +throughput_ratio: 0.0719 +primary_head_log_index: 6000 +replica_applied: 1:6000 diff --git a/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt3.txt b/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt3.txt new file mode 100644 index 0000000..b474831 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-catchup-gate.attempt3.txt @@ -0,0 +1,14 @@ +replication_catchup_bench +sync_mode: normal +seed_commits: 1000 +backlog_commits: 5000 +max_frames: 256 +applied_frames: 5234 +catchup_loops: 21 +produce_elapsed_ms: 216.199 +catchup_elapsed_ms: 2826.335 +primary_frames_per_sec: 23126.87 +catchup_frames_per_sec: 1851.87 +throughput_ratio: 0.0801 +primary_head_log_index: 6000 +replica_applied: 1:6000 diff --git a/docs/benchmarks/results/2026-02-08-replication-catchup-gate.txt b/docs/benchmarks/results/2026-02-08-replication-catchup-gate.txt new file mode 100644 index 0000000..c2c2f4d --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-catchup-gate.txt @@ -0,0 +1,14 @@ +replication_catchup_bench +sync_mode: normal +seed_commits: 1000 +backlog_commits: 5000 +max_frames: 256 +applied_frames: 5234 +catchup_loops: 21 +produce_elapsed_ms: 285.311 +catchup_elapsed_ms: 1837.411 +primary_frames_per_sec: 17524.76 +catchup_frames_per_sec: 2848.57 +throughput_ratio: 0.1625 +primary_head_log_index: 6000 +replica_applied: 1:6000 diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt1.txt b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt1.txt new file mode 100644 index 0000000..4922524 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt1.txt @@ -0,0 +1,56 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 0 +Edge types: 1 +Edge props: 0 +Iterations: 10,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 0 +Replication primary: false +Replication segment max bytes: 1,073,741,824 +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + + Built in 13ms + +[2/6] Vector setup... + +--- Vector Operations --- + Skipped (vector_count/vector_dims == 0) + +[3/6] Checkpointing... + Checkpointed in 16ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 583ns p95= 1.12us p99= 1.62us max= 604.96us (1319551 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 83ns p95= 167ns p99= 291ns max= 211.58us (8403044 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 84ns p95= 333ns p99= 541ns max= 453.50us (4293244 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 49.62us p95= 147.79us p99= 758.17us max= 758.17us (12292 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 106.25us p95= 224.42us p99= 2.74ms max= 2.74ms (5969 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt2.txt b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt2.txt new file mode 100644 index 0000000..02466b0 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt2.txt @@ -0,0 +1,56 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 0 +Edge types: 1 +Edge props: 0 +Iterations: 10,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 0 +Replication primary: false +Replication segment max bytes: 1,073,741,824 +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + + Built in 7ms + +[2/6] Vector setup... + +--- Vector Operations --- + Skipped (vector_count/vector_dims == 0) + +[3/6] Checkpointing... + Checkpointed in 6ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 375ns p99= 750ns max= 511.00us (4945151 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 83ns p95= 84ns p99= 84ns max= 47.08us (13726553 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 42ns p95= 84ns p99= 84ns max= 1.17us (17429893 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 42.58us p95= 74.62us p99= 104.83us max= 104.83us (21755 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 39.54us p95= 144.88us p99= 152.25us max= 152.25us (19379 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt3.txt b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt3.txt new file mode 100644 index 0000000..9a057f2 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt3.txt @@ -0,0 +1,56 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 0 +Edge types: 1 +Edge props: 0 +Iterations: 10,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 0 +Replication primary: false +Replication segment max bytes: 1,073,741,824 +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + + Built in 5ms + +[2/6] Vector setup... + +--- Vector Operations --- + Skipped (vector_count/vector_dims == 0) + +[3/6] Checkpointing... + Checkpointed in 7ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 84ns p95= 250ns p99= 542ns max= 454.92us (6199974 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 83ns p95= 208ns p99= 250ns max= 53.83us (8844071 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 42ns p95= 84ns p99= 84ns max= 750ns (17197466 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 38.62us p95= 51.62us p99= 117.67us max= 117.67us (24511 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 40.83us p95= 122.79us p99= 276.25us max= 276.25us (18851 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt4.txt b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt4.txt new file mode 100644 index 0000000..2422fb5 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt4.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 20,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: false +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 119ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 130.38us p95= 340.00us p99= 340.00us max= 340.00us (5849 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 126ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 167ns p99= 292ns max= 389.17us (7426088 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 208ns p95= 334ns p99= 625ns max= 531.71us (3809527 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 83ns p95= 125ns p99= 250ns max= 16.46us (10312101 ops/sec) +node_vector() random p50= 125ns p95= 125ns p99= 291ns max= 2.25us (9011197 ops/sec) +has_node_vector() random p50= 83ns p95= 84ns p99= 84ns max= 7.12us (14872885 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 39.21us p95= 100.50us p99= 184.12us max= 184.12us (19739 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 43.42us p95= 59.08us p99= 140.33us max= 140.33us (22161 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 176.58us p95= 277.71us p99= 354.46us max= 354.46us (5273 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt5.txt b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt5.txt new file mode 100644 index 0000000..f69f36c --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt5.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 20,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: false +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 130ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 110.42us p95= 236.75us p99= 236.75us max= 236.75us (7750 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 130ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 209ns p99= 375ns max= 416.42us (6738113 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 208ns p95= 334ns p99= 709ns max= 486.71us (3715762 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 84ns p95= 125ns p99= 167ns max= 7.04us (10263595 ops/sec) +node_vector() random p50= 125ns p95= 208ns p99= 334ns max= 35.08us (7755614 ops/sec) +has_node_vector() random p50= 83ns p95= 84ns p99= 84ns max= 7.92us (14685787 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 38.29us p95= 77.29us p99= 107.71us max= 107.71us (23275 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 42.83us p95= 103.88us p99= 135.21us max= 135.21us (19711 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 180.29us p95= 236.33us p99= 244.58us max= 244.58us (5484 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt6.txt b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt6.txt new file mode 100644 index 0000000..b600e5e --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt6.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 20,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: false +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 130ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 120.96us p95= 293.79us p99= 293.79us max= 293.79us (6544 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 123ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 292ns p99= 708ns max= 482.83us (5936848 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 208ns p95= 292ns p99= 375ns max= 492.42us (4075762 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 83ns p95= 125ns p99= 208ns max= 4.58us (11065129 ops/sec) +node_vector() random p50= 125ns p95= 208ns p99= 333ns max= 15.08us (8066595 ops/sec) +has_node_vector() random p50= 83ns p95= 84ns p99= 84ns max= 666ns (15050449 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 33.04us p95= 77.67us p99= 147.04us max= 147.04us (26273 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 37.79us p95= 95.62us p99= 104.17us max= 104.17us (23246 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 176.88us p95= 233.00us p99= 345.79us max= 345.79us (5406 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt7.txt b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt7.txt new file mode 100644 index 0000000..1844e66 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.attempt7.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 20,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: false +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 117ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 115.83us p95= 253.46us p99= 253.46us max= 253.46us (6768 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 126ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 250ns p99= 666ns max= 432.54us (6238292 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 167ns p95= 292ns p99= 708ns max= 528.42us (4184319 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 83ns p95= 125ns p99= 125ns max= 8.92us (11344267 ops/sec) +node_vector() random p50= 125ns p95= 208ns p99= 375ns max= 10.04us (8203143 ops/sec) +has_node_vector() random p50= 83ns p95= 84ns p99= 84ns max= 6.92us (14754888 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 35.96us p95= 40.04us p99= 102.33us max= 102.33us (26810 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 43.54us p95= 99.21us p99= 160.83us max= 160.83us (19602 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 174.38us p95= 265.79us p99= 340.46us max= 340.46us (5387 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-baseline.txt b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.txt new file mode 100644 index 0000000..8595963 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-baseline.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 5,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: false +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 119ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 157.33us p95= 243.29us p99= 243.29us max= 243.29us (6109 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 124ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 375ns p99= 583ns max= 439.12us (4247543 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 208ns p95= 333ns p99= 459ns max= 516.17us (3109783 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 84ns p95= 208ns p99= 334ns max= 14.50us (8622459 ops/sec) +node_vector() random p50= 125ns p95= 291ns p99= 417ns max= 1.92us (7523639 ops/sec) +has_node_vector() random p50= 42ns p95= 84ns p99= 84ns max= 541ns (16609309 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 32.38us p95= 46.29us p99= 93.12us max= 93.12us (29030 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 43.92us p95= 106.08us p99= 119.50us max= 119.50us (19805 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 172.38us p95= 241.29us p99= 331.21us max= 331.21us (5485 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt1.txt b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt1.txt new file mode 100644 index 0000000..6ab8676 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt1.txt @@ -0,0 +1,56 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 0 +Edge types: 1 +Edge props: 0 +Iterations: 10,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 0 +Replication primary: true +Replication segment max bytes: 1,073,741,824 +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + + Built in 16ms + +[2/6] Vector setup... + +--- Vector Operations --- + Skipped (vector_count/vector_dims == 0) + +[3/6] Checkpointing... + Checkpointed in 13ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 458ns p95= 1.04us p99= 1.42us max= 635.00us (1632921 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 83ns p95= 208ns p99= 416ns max= 59.17us (8297461 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 125ns p95= 250ns p99= 375ns max= 543.46us (5374073 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 95.50us p95= 272.29us p99= 583.54us max= 583.54us (8657 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 108.29us p95= 2.93ms p99= 4.38ms max= 4.38ms (2906 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt2.txt b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt2.txt new file mode 100644 index 0000000..19be878 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt2.txt @@ -0,0 +1,56 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 0 +Edge types: 1 +Edge props: 0 +Iterations: 10,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 0 +Replication primary: true +Replication segment max bytes: 1,073,741,824 +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + + Built in 4ms + +[2/6] Vector setup... + +--- Vector Operations --- + Skipped (vector_count/vector_dims == 0) + +[3/6] Checkpointing... + Checkpointed in 6ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 541ns p99= 750ns max= 529.29us (3782271 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 83ns p95= 84ns p99= 84ns max= 53.08us (13404448 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 42ns p95= 84ns p99= 84ns max= 13.96us (17329281 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 36.42us p95= 49.25us p99= 179.38us max= 179.38us (24691 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 35.79us p95= 80.92us p99= 150.67us max= 150.67us (23942 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt3.txt b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt3.txt new file mode 100644 index 0000000..ce9146e --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt3.txt @@ -0,0 +1,56 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 0 +Edge types: 1 +Edge props: 0 +Iterations: 10,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 0 +Replication primary: true +Replication segment max bytes: 1,073,741,824 +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + + Built in 4ms + +[2/6] Vector setup... + +--- Vector Operations --- + Skipped (vector_count/vector_dims == 0) + +[3/6] Checkpointing... + Checkpointed in 5ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 84ns p95= 209ns p99= 417ns max= 426.21us (6199347 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 83ns p95= 84ns p99= 250ns max= 43.38us (11767032 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 42ns p95= 84ns p99= 84ns max= 24.83us (16610799 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 35.08us p95= 47.29us p99= 121.75us max= 121.75us (26827 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 35.75us p95= 91.71us p99= 145.46us max= 145.46us (23991 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt4.txt b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt4.txt new file mode 100644 index 0000000..d545521 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt4.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 20,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: true +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 127ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 135.00us p95= 262.75us p99= 262.75us max= 262.75us (6874 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 133ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 84ns p95= 167ns p99= 375ns max= 411.25us (7478274 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 208ns p95= 291ns p99= 375ns max= 485.38us (4373016 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 83ns p95= 125ns p99= 125ns max= 5.88us (11103068 ops/sec) +node_vector() random p50= 125ns p95= 208ns p99= 333ns max= 24.29us (7974908 ops/sec) +has_node_vector() random p50= 83ns p95= 84ns p99= 125ns max= 24.88us (13975557 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 30.62us p95= 37.04us p99= 99.62us max= 99.62us (30638 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 37.71us p95= 48.12us p99= 89.12us max= 89.12us (25387 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 171.38us p95= 199.25us p99= 256.92us max= 256.92us (5732 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt5.txt b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt5.txt new file mode 100644 index 0000000..4f67217 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt5.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 20,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: true +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 129ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 131.62us p95= 286.71us p99= 286.71us max= 286.71us (6417 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 128ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 167ns p99= 333ns max= 402.00us (7290588 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 208ns p95= 333ns p99= 417ns max= 513.04us (4065384 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 83ns p95= 250ns p99= 334ns max= 25.50us (8540246 ops/sec) +node_vector() random p50= 125ns p95= 166ns p99= 292ns max= 21.62us (8367490 ops/sec) +has_node_vector() random p50= 83ns p95= 84ns p99= 84ns max= 4.21us (15130012 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 38.08us p95= 49.92us p99= 80.38us max= 80.38us (25159 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 39.46us p95= 48.04us p99= 101.38us max= 101.38us (24364 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 176.88us p95= 279.96us p99= 307.58us max= 307.58us (5250 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt6.txt b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt6.txt new file mode 100644 index 0000000..cabd2bb --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt6.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 20,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: true +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 127ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 221.50us p95= 412.92us p99= 412.92us max= 412.92us (4498 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 134ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 167ns p99= 292ns max= 406.54us (7468480 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 208ns p95= 292ns p99= 625ns max= 546.29us (3987081 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 83ns p95= 125ns p99= 125ns max= 5.33us (11042830 ops/sec) +node_vector() random p50= 125ns p95= 125ns p99= 250ns max= 2.38us (8773080 ops/sec) +has_node_vector() random p50= 83ns p95= 125ns p99= 167ns max= 24.25us (12692449 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 36.38us p95= 54.67us p99= 92.71us max= 92.71us (25672 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 42.67us p95= 51.96us p99= 93.46us max= 93.46us (22770 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 172.54us p95= 230.75us p99= 267.33us max= 267.33us (5620 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt7.txt b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt7.txt new file mode 100644 index 0000000..e324238 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-primary.attempt7.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 20,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: true +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 203ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 269.17us p95= 3.37ms p99= 3.37ms max= 3.37ms (1704 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 189ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 125ns p95= 542ns p99= 792ns max= 800.79us (4009701 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 250ns p95= 708ns p99= 917ns max= 582.25us (2486676 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 125ns p95= 250ns p99= 500ns max= 29.92us (7134477 ops/sec) +node_vector() random p50= 167ns p95= 334ns p99= 459ns max= 83.00us (4755085 ops/sec) +has_node_vector() random p50= 125ns p95= 125ns p99= 208ns max= 171.67us (7404174 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 66.33us p95= 132.29us p99= 354.42us max= 354.42us (13031 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 72.42us p95= 155.00us p99= 251.67us max= 251.67us (12988 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 318.71us p95= 529.46us p99= 649.88us max= 649.88us (3047 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-gate-primary.txt b/docs/benchmarks/results/2026-02-08-replication-gate-primary.txt new file mode 100644 index 0000000..0e268e7 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-gate-primary.txt @@ -0,0 +1,60 @@ +======================================================================================================================== +Single-file Raw Benchmark (Rust) +======================================================================================================================== +Nodes: 10,000 +Edges: 50,000 +Edge types: 3 +Edge props: 10 +Iterations: 5,000 +WAL size: 67,108,864 bytes +Sync mode: Normal +Group commit: false (window 2ms) +Auto-checkpoint: false +Checkpoint threshold: 0.8 +Vector dims: 128 +Vector count: 1,000 +Replication primary: true +Skip checkpoint: false +Reopen read-only: false +======================================================================================================================== + +[1/6] Building graph... + Creating nodes... + Created 5000 / 10000 nodes Created 10000 / 10000 nodes + Creating edges... + Created 5000 / 50000 edges Created 10000 / 50000 edges Created 15000 / 50000 edges Created 20000 / 50000 edges Created 25000 / 50000 edges Created 30000 / 50000 edges Created 35000 / 50000 edges Created 40000 / 50000 edges Created 45000 / 50000 edges Created 50000 / 50000 edges + Built in 115ms + +[2/6] Vector setup... + +--- Vector Operations --- +Set vectors (batch 100) p50= 146.83us p95= 238.42us p99= 238.42us max= 238.42us (6490 ops/sec) + +[3/6] Checkpointing... + Checkpointed in 125ms + +[4/6] Key lookup benchmarks... + +--- Key Lookups (node_by_key) --- +Random existing keys p50= 84ns p95= 333ns p99= 459ns max= 447.21us (4546260 ops/sec) + +[5/6] Traversal and edge benchmarks... + +--- 1-Hop Traversals (out) --- +Random nodes p50= 208ns p95= 334ns p99= 709ns max= 527.21us (3068479 ops/sec) + +--- Edge Exists --- +Random edge exists p50= 84ns p95= 167ns p99= 292ns max= 11.38us (9021249 ops/sec) +node_vector() random p50= 125ns p95= 250ns p99= 375ns max= 1.46us (7641054 ops/sec) +has_node_vector() random p50= 42ns p95= 84ns p99= 84ns max= 334ns (16867161 ops/sec) + +[6/6] Write benchmarks... + +--- Batch Writes (100 nodes) --- +Batch of 100 nodes p50= 35.17us p95= 55.12us p99= 106.67us max= 106.67us (25776 ops/sec) + +--- Batch Writes (100 edges) --- +Batch of 100 edges p50= 40.79us p95= 56.33us p99= 103.50us max= 103.50us (23632 ops/sec) + +--- Batch Writes (100 edges + props) --- +Batch of 100 edges + props p50= 173.88us p95= 285.83us p99= 310.67us max= 310.67us (5425 ops/sec) diff --git a/docs/benchmarks/results/2026-02-08-replication-soak-gate.txt b/docs/benchmarks/results/2026-02-08-replication-soak-gate.txt new file mode 100644 index 0000000..d34c32a --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-replication-soak-gate.txt @@ -0,0 +1,26 @@ +replication_soak_bench +sync_mode: normal +replicas: 5 +cycles: 6 +commits_per_cycle: 40 +active_replicas_per_cycle: 3 +churn_interval: 2 +promotion_interval: 3 +reseed_check_interval: 2 +max_frames: 128 +recovery_max_loops: 80 +progress_cycle: 1 primary_epoch: 1 primary_head_log_index: 40 reseeds: 0 promotions: 0 +progress_cycle: 4 primary_epoch: 2 primary_head_log_index: 160 reseeds: 0 promotions: 1 +progress_cycle: 6 primary_epoch: 2 primary_head_log_index: 240 reseeds: 0 promotions: 2 +writes_committed: 240 +promotion_count: 2 +stale_fence_rejections: 2 +reseed_count: 0 +reseed_recovery_successes: 0 +max_recovery_loops: 1 +max_observed_lag: 40 +divergence_violations: 0 +final_primary_epoch: 3 +final_primary_head_log_index: 240 +final_primary_nodes: 240 +elapsed_ms: 3125.407 diff --git a/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt1.txt b/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt1.txt new file mode 100644 index 0000000..6f16be6 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt1.txt @@ -0,0 +1,14 @@ +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 48 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 10433.000 +search_p50_ms: 0.933375 +search_p95_ms: 2.346209 +mean_recall_at_k: 0.172000 diff --git a/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt2.txt b/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt2.txt new file mode 100644 index 0000000..bf151e5 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt2.txt @@ -0,0 +1,14 @@ +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 48 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 20894.000 +search_p50_ms: 1.061125 +search_p95_ms: 2.201875 +mean_recall_at_k: 0.177500 diff --git a/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt3.txt b/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt3.txt new file mode 100644 index 0000000..848fc7d --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-ann-gate.attempt3.txt @@ -0,0 +1,14 @@ +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 48 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 19745.000 +search_p50_ms: 0.573042 +search_p95_ms: 1.283417 +mean_recall_at_k: 0.187000 diff --git a/docs/benchmarks/results/2026-02-08-vector-ann-matrix.csv b/docs/benchmarks/results/2026-02-08-vector-ann-matrix.csv new file mode 100644 index 0000000..55dd01f --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-ann-matrix.csv @@ -0,0 +1,7 @@ +algorithm,residuals,n_probe,build_elapsed_ms,search_p50_ms,search_p95_ms,mean_recall_at_k +ivf,na,8,1453.000,0.469542,0.766000,0.166000 +ivf_pq,true,8,9325.000,0.475416,0.901458,0.107500 +ivf_pq,false,8,8790.000,0.291292,0.450750,0.119500 +ivf,na,16,3970.000,2.434625,4.027167,0.290500 +ivf_pq,true,16,20721.000,2.350041,5.996167,0.172500 +ivf_pq,false,16,21072.000,0.824291,1.399292,0.177500 diff --git a/docs/benchmarks/results/2026-02-08-vector-ann-matrix.txt b/docs/benchmarks/results/2026-02-08-vector-ann-matrix.txt new file mode 100644 index 0000000..1f0934f --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-ann-matrix.txt @@ -0,0 +1,107 @@ +Vector ANN matrix benchmark +date=2026-02-08 +vectors=20000 dimensions=384 queries=200 k=10 +n_probes={8 16} +pq_subspaces=48 pq_centroids=256 +seed=42 + +RUN algorithm=ivf residuals=na n_probe=8 +algorithm: ivf +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 8 +build_elapsed_ms: 1453.000 +search_p50_ms: 0.469542 +search_p95_ms: 0.766000 +mean_recall_at_k: 0.166000 + +RUN algorithm=ivf_pq residuals=true n_probe=8 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 8 +pq_subspaces: 48 +pq_centroids: 256 +residuals: true +build_elapsed_ms: 9325.000 +search_p50_ms: 0.475416 +search_p95_ms: 0.901458 +mean_recall_at_k: 0.107500 + +RUN algorithm=ivf_pq residuals=false n_probe=8 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 8 +pq_subspaces: 48 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 8790.000 +search_p50_ms: 0.291292 +search_p95_ms: 0.450750 +mean_recall_at_k: 0.119500 + +RUN algorithm=ivf residuals=na n_probe=16 +algorithm: ivf +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +build_elapsed_ms: 3970.000 +search_p50_ms: 2.434625 +search_p95_ms: 4.027167 +mean_recall_at_k: 0.290500 + +RUN algorithm=ivf_pq residuals=true n_probe=16 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 48 +pq_centroids: 256 +residuals: true +build_elapsed_ms: 20721.000 +search_p50_ms: 2.350041 +search_p95_ms: 5.996167 +mean_recall_at_k: 0.172500 + +RUN algorithm=ivf_pq residuals=false n_probe=16 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 48 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 21072.000 +search_p50_ms: 0.824291 +search_p95_ms: 1.399292 +mean_recall_at_k: 0.177500 + +raw_output=/Users/mask/code/raydb/ray-rs/../docs/benchmarks/results/2026-02-08-vector-ann-matrix.txt +csv_output=/Users/mask/code/raydb/ray-rs/../docs/benchmarks/results/2026-02-08-vector-ann-matrix.csv +SUMMARY (best recall then p95 latency): +algorithm,residuals,n_probe,build_elapsed_ms,search_p50_ms,search_p95_ms,mean_recall_at_k +ivf,na,16,3970.000,2.434625,4.027167,0.290500 +ivf_pq,false,16,21072.000,0.824291,1.399292,0.177500 +ivf_pq,true,16,20721.000,2.350041,5.996167,0.172500 +ivf,na,8,1453.000,0.469542,0.766000,0.166000 +ivf_pq,false,8,8790.000,0.291292,0.450750,0.119500 +ivf_pq,true,8,9325.000,0.475416,0.901458,0.107500 diff --git a/docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.csv b/docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.csv new file mode 100644 index 0000000..c022562 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.csv @@ -0,0 +1,11 @@ +algorithm,residuals,n_probe,pq_subspaces,pq_centroids,build_elapsed_ms,search_p50_ms,search_p95_ms,mean_recall_at_k,recall_ratio_vs_ivf,p95_ratio_vs_ivf +ivf,na,8,na,na,1346.000,0.412833,0.526833,0.176000,1.000000,1.000000 +ivf,na,16,na,na,1282.000,0.819666,1.155416,0.269000,1.000000,1.000000 +ivf_pq,false,8,24,128,3887.000,0.181000,0.224042,0.060000,0.340909,0.425262 +ivf_pq,false,8,24,256,6845.000,0.197792,0.293750,0.067000,0.380682,0.557577 +ivf_pq,false,8,48,128,4726.000,0.263958,0.405166,0.114500,0.650568,0.769060 +ivf_pq,false,8,48,256,6812.000,0.265625,0.324250,0.121000,0.687500,0.615470 +ivf_pq,false,16,24,128,3839.000,0.270292,0.333959,0.068500,0.254647,0.289038 +ivf_pq,false,16,24,256,6955.000,0.286875,0.435375,0.082500,0.306691,0.376812 +ivf_pq,false,16,48,128,3889.000,0.385292,0.566083,0.159000,0.591078,0.489939 +ivf_pq,false,16,48,256,6221.000,0.400833,0.535458,0.178500,0.663569,0.463433 diff --git a/docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.txt b/docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.txt new file mode 100644 index 0000000..62ea2e3 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.txt @@ -0,0 +1,175 @@ +Vector ANN PQ tuning benchmark +date=2026-02-08 +vectors=20000 dimensions=384 queries=200 k=10 +n_probes={8 16} +pq_subspaces_set={24 48} +pq_centroids_set={128 256} +residuals_set={false} +seed=42 + +RUN baseline algorithm=ivf n_probe=8 +algorithm: ivf +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 8 +build_elapsed_ms: 1346.000 +search_p50_ms: 0.412833 +search_p95_ms: 0.526833 +mean_recall_at_k: 0.176000 + +RUN baseline algorithm=ivf n_probe=16 +algorithm: ivf +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +build_elapsed_ms: 1282.000 +search_p50_ms: 0.819666 +search_p95_ms: 1.155416 +mean_recall_at_k: 0.269000 + +RUN algorithm=ivf_pq residuals=false n_probe=8 pq_subspaces=24 pq_centroids=128 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 8 +pq_subspaces: 24 +pq_centroids: 128 +residuals: false +build_elapsed_ms: 3887.000 +search_p50_ms: 0.181000 +search_p95_ms: 0.224042 +mean_recall_at_k: 0.060000 + +RUN algorithm=ivf_pq residuals=false n_probe=8 pq_subspaces=24 pq_centroids=256 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 8 +pq_subspaces: 24 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 6845.000 +search_p50_ms: 0.197792 +search_p95_ms: 0.293750 +mean_recall_at_k: 0.067000 + +RUN algorithm=ivf_pq residuals=false n_probe=8 pq_subspaces=48 pq_centroids=128 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 8 +pq_subspaces: 48 +pq_centroids: 128 +residuals: false +build_elapsed_ms: 4726.000 +search_p50_ms: 0.263958 +search_p95_ms: 0.405166 +mean_recall_at_k: 0.114500 + +RUN algorithm=ivf_pq residuals=false n_probe=8 pq_subspaces=48 pq_centroids=256 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 8 +pq_subspaces: 48 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 6812.000 +search_p50_ms: 0.265625 +search_p95_ms: 0.324250 +mean_recall_at_k: 0.121000 + +RUN algorithm=ivf_pq residuals=false n_probe=16 pq_subspaces=24 pq_centroids=128 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 24 +pq_centroids: 128 +residuals: false +build_elapsed_ms: 3839.000 +search_p50_ms: 0.270292 +search_p95_ms: 0.333959 +mean_recall_at_k: 0.068500 + +RUN algorithm=ivf_pq residuals=false n_probe=16 pq_subspaces=24 pq_centroids=256 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 24 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 6955.000 +search_p50_ms: 0.286875 +search_p95_ms: 0.435375 +mean_recall_at_k: 0.082500 + +RUN algorithm=ivf_pq residuals=false n_probe=16 pq_subspaces=48 pq_centroids=128 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 48 +pq_centroids: 128 +residuals: false +build_elapsed_ms: 3889.000 +search_p50_ms: 0.385292 +search_p95_ms: 0.566083 +mean_recall_at_k: 0.159000 + +RUN algorithm=ivf_pq residuals=false n_probe=16 pq_subspaces=48 pq_centroids=256 +algorithm: ivf_pq +vectors: 20000 +dimensions: 384 +queries: 200 +k: 10 +n_clusters: 141 +n_probe: 16 +pq_subspaces: 48 +pq_centroids: 256 +residuals: false +build_elapsed_ms: 6221.000 +search_p50_ms: 0.400833 +search_p95_ms: 0.535458 +mean_recall_at_k: 0.178500 + +raw_output=/Users/mask/code/raydb/ray-rs/../docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.txt +csv_output=/Users/mask/code/raydb/ray-rs/../docs/benchmarks/results/2026-02-08-vector-ann-pq-tuning.csv +SUMMARY (best PQ configs by recall_ratio, then p95_ratio): +algorithm,residuals,n_probe,pq_subspaces,pq_centroids,build_elapsed_ms,search_p50_ms,search_p95_ms,mean_recall_at_k,recall_ratio_vs_ivf,p95_ratio_vs_ivf +ivf_pq,false,8,48,256,6812.000,0.265625,0.324250,0.121000,0.687500,0.615470 +ivf_pq,false,8,48,128,4726.000,0.263958,0.405166,0.114500,0.650568,0.769060 +ivf_pq,false,8,24,256,6845.000,0.197792,0.293750,0.067000,0.380682,0.557577 +ivf_pq,false,8,24,128,3887.000,0.181000,0.224042,0.060000,0.340909,0.425262 +ivf_pq,false,16,48,256,6221.000,0.400833,0.535458,0.178500,0.663569,0.463433 +ivf_pq,false,16,48,128,3889.000,0.385292,0.566083,0.159000,0.591078,0.489939 +ivf_pq,false,16,24,256,6955.000,0.286875,0.435375,0.082500,0.306691,0.376812 +ivf_pq,false,16,24,128,3839.000,0.270292,0.333959,0.068500,0.254647,0.289038 diff --git a/docs/benchmarks/results/2026-02-08-vector-compaction-matrix.csv b/docs/benchmarks/results/2026-02-08-vector-compaction-matrix.csv new file mode 100644 index 0000000..8f3f652 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-compaction-matrix.csv @@ -0,0 +1,19 @@ +delete_ratio,min_deletion_ratio,max_fragments,min_vectors_to_compact,compaction_performed,compaction_elapsed_ms,bytes_before,bytes_after,reclaim_percent,fragments_before,fragments_after +0.35,0.20,2,10000,true,9.61,76806280,71230880,7.26,10,8 +0.35,0.20,4,10000,true,14.94,76806280,65807544,14.32,10,6 +0.35,0.20,8,10000,true,41.66,76806280,55163624,28.18,10,2 +0.35,0.30,2,10000,true,10.07,76806280,71230880,7.26,10,8 +0.35,0.30,4,10000,true,14.21,76806280,65807544,14.32,10,6 +0.35,0.30,8,10000,true,22.29,76806280,55163624,28.18,10,2 +0.35,0.40,2,10000,false,0.00,76806280,76806280,0.00,10,10 +0.35,0.40,4,10000,false,0.00,76806280,76806280,0.00,10,10 +0.35,0.40,8,10000,false,0.00,76806280,76806280,0.00,10,10 +0.55,0.20,2,10000,true,2.91,76806280,68206496,11.20,10,8 +0.55,0.20,4,10000,true,4.86,76806280,59726520,22.24,10,6 +0.55,0.20,8,10000,true,13.85,76806280,42871016,44.18,10,2 +0.55,0.30,2,10000,true,2.99,76806280,68206496,11.20,10,8 +0.55,0.30,4,10000,true,4.33,76806280,59726520,22.24,10,6 +0.55,0.30,8,10000,true,8.68,76806280,42871016,44.18,10,2 +0.55,0.40,2,10000,true,1.86,76806280,68206496,11.20,10,8 +0.55,0.40,4,10000,true,5.39,76806280,59726520,22.24,10,6 +0.55,0.40,8,10000,true,8.83,76806280,42871016,44.18,10,2 diff --git a/docs/benchmarks/results/2026-02-08-vector-compaction-matrix.txt b/docs/benchmarks/results/2026-02-08-vector-compaction-matrix.txt new file mode 100644 index 0000000..d183791 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-compaction-matrix.txt @@ -0,0 +1,579 @@ +Vector compaction matrix benchmark +date=2026-02-08 +vectors=50000 dimensions=384 fragment_target_size=5000 +delete_ratios={0.35 0.55} +min_deletion_ratios={0.20 0.30 0.40} +max_fragments_set={2 4 8} +min_vectors_to_compact=10000 + +RUN delete_ratio=0.35 min_del=0.20 max_frag=2 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.2, max_fragments=2, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 480.66 +insert_throughput_vectors_per_sec: 104,024 +delete_elapsed_ms: 13.53 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 9.61 +compaction_performed: true +candidate_fragments_before: 2 ([8, 2]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 13,871 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 71,230,880 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 8 + total_deleted_vectors: 17,500 -> 13,871 + average_deletion_ratio: 35.00% -> 29.91% + +RUN delete_ratio=0.35 min_del=0.20 max_frag=4 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.2, max_fragments=4, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 189.36 +insert_throughput_vectors_per_sec: 264,050 +delete_elapsed_ms: 8.23 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 14.94 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 0, 9]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 10,341 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 65,807,544 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 17,500 -> 10,341 + average_deletion_ratio: 35.00% -> 24.14% + +RUN delete_ratio=0.35 min_del=0.20 max_frag=8 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.2, max_fragments=8, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 260.49 +insert_throughput_vectors_per_sec: 191,946 +delete_elapsed_ms: 13.79 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 41.66 +compaction_performed: true +candidate_fragments_before: 8 ([8, 2, 0, 9, 6, 4, 7, 3]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 3,413 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 55,163,624 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 2 + total_deleted_vectors: 17,500 -> 3,413 + average_deletion_ratio: 35.00% -> 9.50% + +RUN delete_ratio=0.35 min_del=0.30 max_frag=2 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.3, max_fragments=2, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 227.67 +insert_throughput_vectors_per_sec: 219,621 +delete_elapsed_ms: 10.32 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 10.07 +compaction_performed: true +candidate_fragments_before: 2 ([8, 2]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 13,871 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 71,230,880 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 8 + total_deleted_vectors: 17,500 -> 13,871 + average_deletion_ratio: 35.00% -> 29.91% + +RUN delete_ratio=0.35 min_del=0.30 max_frag=4 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.3, max_fragments=4, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 208.88 +insert_throughput_vectors_per_sec: 239,369 +delete_elapsed_ms: 6.98 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 14.21 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 0, 9]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 10,341 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 65,807,544 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 17,500 -> 10,341 + average_deletion_ratio: 35.00% -> 24.14% + +RUN delete_ratio=0.35 min_del=0.30 max_frag=8 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.3, max_fragments=8, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 204.27 +insert_throughput_vectors_per_sec: 244,770 +delete_elapsed_ms: 10.55 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 22.29 +compaction_performed: true +candidate_fragments_before: 8 ([8, 2, 0, 9, 6, 4, 7, 3]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 3,413 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 55,163,624 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 2 + total_deleted_vectors: 17,500 -> 3,413 + average_deletion_ratio: 35.00% -> 9.50% + +RUN delete_ratio=0.35 min_del=0.40 max_frag=2 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.4, max_fragments=2, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 161.05 +insert_throughput_vectors_per_sec: 310,459 +delete_elapsed_ms: 6.10 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 0.00 +compaction_performed: false +candidate_fragments_before: 0 ([]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 17,500 + fragment_count: 12 -> 12 + bytes_used: 76,806,280 -> 76,806,280 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 10 + total_deleted_vectors: 17,500 -> 17,500 + average_deletion_ratio: 35.00% -> 35.00% + +RUN delete_ratio=0.35 min_del=0.40 max_frag=4 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.4, max_fragments=4, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 161.79 +insert_throughput_vectors_per_sec: 309,039 +delete_elapsed_ms: 7.39 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 0.00 +compaction_performed: false +candidate_fragments_before: 0 ([]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 17,500 + fragment_count: 12 -> 12 + bytes_used: 76,806,280 -> 76,806,280 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 10 + total_deleted_vectors: 17,500 -> 17,500 + average_deletion_ratio: 35.00% -> 35.00% + +RUN delete_ratio=0.35 min_del=0.40 max_frag=8 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.4, max_fragments=8, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 153.33 +insert_throughput_vectors_per_sec: 326,099 +delete_elapsed_ms: 8.38 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 0.00 +compaction_performed: false +candidate_fragments_before: 0 ([]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 17,500 + fragment_count: 12 -> 12 + bytes_used: 76,806,280 -> 76,806,280 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 10 + total_deleted_vectors: 17,500 -> 17,500 + average_deletion_ratio: 35.00% -> 35.00% + +RUN delete_ratio=0.55 min_del=0.20 max_frag=2 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.2, max_fragments=2, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 140.57 +insert_throughput_vectors_per_sec: 355,707 +delete_elapsed_ms: 11.26 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 2.91 +compaction_performed: true +candidate_fragments_before: 2 ([8, 2]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 21,902 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 68,206,496 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 8 + total_deleted_vectors: 27,500 -> 21,902 + average_deletion_ratio: 55.00% -> 49.33% + +RUN delete_ratio=0.55 min_del=0.20 max_frag=4 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.2, max_fragments=4, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 132.17 +insert_throughput_vectors_per_sec: 378,311 +delete_elapsed_ms: 10.07 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 4.86 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 5, 0]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 16,382 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 59,726,520 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 27,500 -> 16,382 + average_deletion_ratio: 55.00% -> 42.13% + +RUN delete_ratio=0.55 min_del=0.20 max_frag=8 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.2, max_fragments=8, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 135.36 +insert_throughput_vectors_per_sec: 369,392 +delete_elapsed_ms: 9.89 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 13.85 +compaction_performed: true +candidate_fragments_before: 8 ([8, 2, 5, 0, 1, 9, 7, 4]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 5,410 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 42,871,016 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 2 + total_deleted_vectors: 27,500 -> 5,410 + average_deletion_ratio: 55.00% -> 19.38% + +RUN delete_ratio=0.55 min_del=0.30 max_frag=2 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.3, max_fragments=2, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 143.90 +insert_throughput_vectors_per_sec: 347,454 +delete_elapsed_ms: 8.24 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 2.99 +compaction_performed: true +candidate_fragments_before: 2 ([8, 2]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 21,902 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 68,206,496 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 8 + total_deleted_vectors: 27,500 -> 21,902 + average_deletion_ratio: 55.00% -> 49.33% + +RUN delete_ratio=0.55 min_del=0.30 max_frag=4 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.3, max_fragments=4, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 116.29 +insert_throughput_vectors_per_sec: 429,948 +delete_elapsed_ms: 9.91 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 4.33 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 5, 0]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 16,382 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 59,726,520 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 27,500 -> 16,382 + average_deletion_ratio: 55.00% -> 42.13% + +RUN delete_ratio=0.55 min_del=0.30 max_frag=8 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.3, max_fragments=8, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 119.95 +insert_throughput_vectors_per_sec: 416,844 +delete_elapsed_ms: 6.39 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 8.68 +compaction_performed: true +candidate_fragments_before: 8 ([8, 2, 5, 0, 1, 9, 7, 4]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 5,410 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 42,871,016 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 2 + total_deleted_vectors: 27,500 -> 5,410 + average_deletion_ratio: 55.00% -> 19.38% + +RUN delete_ratio=0.55 min_del=0.40 max_frag=2 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.4, max_fragments=2, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 109.13 +insert_throughput_vectors_per_sec: 458,149 +delete_elapsed_ms: 6.05 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 1.86 +compaction_performed: true +candidate_fragments_before: 2 ([8, 2]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 21,902 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 68,206,496 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 8 + total_deleted_vectors: 27,500 -> 21,902 + average_deletion_ratio: 55.00% -> 49.33% + +RUN delete_ratio=0.55 min_del=0.40 max_frag=4 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.4, max_fragments=4, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 114.19 +insert_throughput_vectors_per_sec: 437,872 +delete_elapsed_ms: 6.10 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 5.39 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 5, 0]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 16,382 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 59,726,520 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 27,500 -> 16,382 + average_deletion_ratio: 55.00% -> 42.13% + +RUN delete_ratio=0.55 min_del=0.40 max_frag=8 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.4, max_fragments=8, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 109.15 +insert_throughput_vectors_per_sec: 458,089 +delete_elapsed_ms: 6.97 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 8.83 +compaction_performed: true +candidate_fragments_before: 8 ([8, 2, 5, 0, 1, 9, 7, 4]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 5,410 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 42,871,016 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 2 + total_deleted_vectors: 27,500 -> 5,410 + average_deletion_ratio: 55.00% -> 19.38% + +raw_output=/Users/mask/code/raydb/ray-rs/../docs/benchmarks/results/2026-02-08-vector-compaction-matrix.txt +csv_output=/Users/mask/code/raydb/ray-rs/../docs/benchmarks/results/2026-02-08-vector-compaction-matrix.csv +SUMMARY (mean by strategy): +min_deletion_ratio,max_fragments,runs,mean_compaction_elapsed_ms,mean_reclaim_percent,compaction_performed_ratio +0.20,2,2,6.260,9.230,1.000 +0.20,4,2,9.900,18.280,1.000 +0.20,8,2,27.755,36.180,1.000 +0.30,2,2,6.530,9.230,1.000 +0.30,4,2,9.270,18.280,1.000 +0.30,8,2,15.485,36.180,1.000 +0.40,2,2,0.930,5.600,0.500 +0.40,4,2,2.695,11.120,0.500 +0.40,8,2,4.415,22.090,0.500 diff --git a/docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.csv b/docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.csv new file mode 100644 index 0000000..2d1f22b --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.csv @@ -0,0 +1,7 @@ +delete_ratio,min_vectors_to_compact,compaction_performed,compaction_elapsed_ms,bytes_before,bytes_after,reclaim_percent,fragments_before,fragments_after +0.35,5000,true,8.71,76806280,65807544,14.32,10,6 +0.35,10000,true,8.09,76806280,65807544,14.32,10,6 +0.35,20000,true,7.89,76806280,65807544,14.32,10,6 +0.55,5000,true,4.89,76806280,59726520,22.24,10,6 +0.55,10000,true,9.24,76806280,59726520,22.24,10,6 +0.55,20000,true,5.26,76806280,59726520,22.24,10,6 diff --git a/docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.txt b/docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.txt new file mode 100644 index 0000000..abd62a8 --- /dev/null +++ b/docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.txt @@ -0,0 +1,199 @@ +Vector compaction min_vectors sweep +date=2026-02-08 +vectors=50000 dimensions=384 fragment_target_size=5000 +fixed strategy: min_deletion_ratio=0.30 max_fragments=4 +sweep: min_vectors_to_compact in {5000,10000,20000} + +RUN delete_ratio=0.35 min_vectors=5000 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.3, max_fragments=4, min_vectors_to_compact=5,000 +==================================================================================================== +insert_elapsed_ms: 133.94 +insert_throughput_vectors_per_sec: 373,299 +delete_elapsed_ms: 6.80 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 8.71 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 0, 9]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 10,341 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 65,807,544 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 17,500 -> 10,341 + average_deletion_ratio: 35.00% -> 24.14% + +RUN delete_ratio=0.35 min_vectors=10000 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.3, max_fragments=4, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 149.68 +insert_throughput_vectors_per_sec: 334,051 +delete_elapsed_ms: 7.55 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 8.09 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 0, 9]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 10,341 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 65,807,544 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 17,500 -> 10,341 + average_deletion_ratio: 35.00% -> 24.14% + +RUN delete_ratio=0.35 min_vectors=20000 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 35.00% +strategy: min_deletion_ratio=0.3, max_fragments=4, min_vectors_to_compact=20,000 +==================================================================================================== +insert_elapsed_ms: 144.54 +insert_throughput_vectors_per_sec: 345,919 +delete_elapsed_ms: 5.13 +deleted_vectors: 17,500 (requested 17,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 7.89 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 0, 9]) + +Store stats (before -> after): + live_vectors: 32,500 -> 32,500 + total_deleted: 17,500 -> 10,341 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 65,807,544 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 17,500 -> 10,341 + average_deletion_ratio: 35.00% -> 24.14% + +RUN delete_ratio=0.55 min_vectors=5000 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.3, max_fragments=4, min_vectors_to_compact=5,000 +==================================================================================================== +insert_elapsed_ms: 146.31 +insert_throughput_vectors_per_sec: 341,730 +delete_elapsed_ms: 8.65 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 4.89 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 5, 0]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 16,382 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 59,726,520 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 27,500 -> 16,382 + average_deletion_ratio: 55.00% -> 42.13% + +RUN delete_ratio=0.55 min_vectors=10000 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.3, max_fragments=4, min_vectors_to_compact=10,000 +==================================================================================================== +insert_elapsed_ms: 210.47 +insert_throughput_vectors_per_sec: 237,560 +delete_elapsed_ms: 17.35 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 9.24 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 5, 0]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 16,382 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 59,726,520 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 27,500 -> 16,382 + average_deletion_ratio: 55.00% -> 42.13% + +RUN delete_ratio=0.55 min_vectors=20000 +==================================================================================================== +Vector Compaction Strategy Benchmark (Rust) +==================================================================================================== +vectors: 50,000 +dimensions: 384 +fragment_target_size: 5,000 +delete_ratio: 55.00% +strategy: min_deletion_ratio=0.3, max_fragments=4, min_vectors_to_compact=20,000 +==================================================================================================== +insert_elapsed_ms: 171.76 +insert_throughput_vectors_per_sec: 291,109 +delete_elapsed_ms: 5.98 +deleted_vectors: 27,500 (requested 27,500) +clear_deleted_elapsed_ms: 0.00 +cleared_fragments: 0 +compaction_elapsed_ms: 5.26 +compaction_performed: true +candidate_fragments_before: 4 ([8, 2, 5, 0]) + +Store stats (before -> after): + live_vectors: 22,500 -> 22,500 + total_deleted: 27,500 -> 16,382 + fragment_count: 12 -> 13 + bytes_used: 76,806,280 -> 59,726,520 + +Compaction stats (before -> after): + fragments_needing_compaction: 10 -> 6 + total_deleted_vectors: 27,500 -> 16,382 + average_deletion_ratio: 55.00% -> 42.13% + +raw_output=../docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.txt +csv_output=../docs/benchmarks/results/2026-02-08-vector-compaction-min-vectors-sweep.csv +SUMMARY (mean by min_vectors_to_compact): +min_vectors_to_compact,runs,mean_compaction_elapsed_ms,mean_reclaim_percent,compaction_performed_ratio +5000,2,6.800,18.280,1.000 +10000,2,8.665,18.280,1.000 +20000,2,6.575,18.280,1.000 diff --git a/docs/bindings-parity.md b/docs/bindings-parity.md index 51fc0a1..6bb3002 100644 --- a/docs/bindings-parity.md +++ b/docs/bindings-parity.md @@ -29,6 +29,7 @@ Legend | Vector PropValue | Full | Missing | Full | Python bindings do not expose PropValue VectorF32 | | Schema IDs/labels | Full | Full | Full | Labels, edge types, prop keys | | Cache API | Full | Full | Full | Python/NAPI include extra cache control | +| Replication controls + status (Phase D) | Full | Full | Full | Promote, retention, reseed, token wait, primary/replica status | | Integrity check | Full | Missing | Full | Single-file uses full snapshot check | | Optimize/compact | Full | Partial | Full | Single-file checkpoint + vacuum/options exposed | | Vector embeddings | Full | Full | Full | `set/get/del/has` node vectors | diff --git a/docs/examples/replication_adapter_generic_middleware.ts b/docs/examples/replication_adapter_generic_middleware.ts new file mode 100644 index 0000000..79528a7 --- /dev/null +++ b/docs/examples/replication_adapter_generic_middleware.ts @@ -0,0 +1,144 @@ +/** + * Host-runtime replication HTTP adapter template (generic middleware). + * + * Purpose: + * - framework-agnostic route handler factory + * - plug into Express/Fastify/Hono/Elysia adapters + * - reuse transport JSON helpers from `ray-rs/ts/replication_transport.ts` + */ + +import { + createReplicationTransportAdapter, + type ReplicationLogTransportOptions, + type ReplicationTransportAdapter, +} from '../../ray-rs/ts/replication_transport' +import type { Database } from '../../ray-rs/index' + +type RequestLike = { + method: string + path: string + query: Record + headers: Record + body?: unknown +} + +type ResponseLike = { + status: number + headers?: Record + body: unknown +} + +type RequireAdmin = (request: RequestLike) => void + +function parseBool(raw: string | undefined, fallback: boolean): boolean { + if (raw === undefined) return fallback + const normalized = raw.trim().toLowerCase() + if (normalized === '1' || normalized === 'true' || normalized === 'yes') return true + if (normalized === '0' || normalized === 'false' || normalized === 'no') return false + return fallback +} + +function parsePositiveInt(raw: string | undefined, fallback: number, max: number): number { + if (raw === undefined || raw.trim() === '') return fallback + const parsed = Number(raw) + if (!Number.isFinite(parsed)) return fallback + return Math.min(Math.max(Math.floor(parsed), 1), max) +} + +export function createReplicationMiddleware( + db: Database, + requireAdmin: RequireAdmin, +): (request: RequestLike) => ResponseLike { + const adapter: ReplicationTransportAdapter = createReplicationTransportAdapter(db) + + return (request: RequestLike): ResponseLike => { + const path = request.path + try { + if (path === '/replication/status') { + return { + status: 200, + body: { + primary: db.primaryReplicationStatus(), + replica: db.replicaReplicationStatus(), + }, + } + } + + if (path === '/replication/metrics/prometheus') { + requireAdmin(request) + return { + status: 200, + headers: { 'content-type': 'text/plain; charset=utf-8' }, + body: adapter.metricsPrometheus(), + } + } + + if (path === '/replication/metrics/otel-json') { + requireAdmin(request) + return { status: 200, body: JSON.parse(adapter.metricsOtelJson()) } + } + + if (path === '/replication/transport/snapshot') { + requireAdmin(request) + const includeData = parseBool(request.query.includeData, false) + return { status: 200, body: adapter.snapshot(includeData) } + } + + if (path === '/replication/transport/log') { + requireAdmin(request) + const options: ReplicationLogTransportOptions = { + cursor: request.query.cursor ?? null, + maxFrames: parsePositiveInt(request.query.maxFrames, 128, 10_000), + maxBytes: parsePositiveInt(request.query.maxBytes, 1024 * 1024, 32 * 1024 * 1024), + includePayload: parseBool(request.query.includePayload, true), + } + return { status: 200, body: adapter.log(options) } + } + + if (path === '/replication/pull' && request.method === 'POST') { + requireAdmin(request) + const maxFrames = Number( + (request.body as { maxFrames?: number } | undefined)?.maxFrames ?? 256, + ) + const appliedFrames = db.replicaCatchUpOnce(Math.max(1, maxFrames)) + return { + status: 200, + body: { appliedFrames, replica: db.replicaReplicationStatus() }, + } + } + + if (path === '/replication/reseed' && request.method === 'POST') { + requireAdmin(request) + db.replicaReseedFromSnapshot() + return { status: 200, body: { replica: db.replicaReplicationStatus() } } + } + + if (path === '/replication/promote' && request.method === 'POST') { + requireAdmin(request) + const epoch = db.primaryPromoteToNextEpoch() + return { + status: 200, + body: { epoch, primary: db.primaryReplicationStatus() }, + } + } + + return { status: 404, body: { error: 'not found' } } + } catch (error) { + return { + status: 500, + body: { error: error instanceof Error ? error.message : String(error) }, + } + } + } +} + +/** + * Example auth callback: + * const token = process.env.REPLICATION_ADMIN_TOKEN ?? '' + * const requireAdmin: RequireAdmin = (request) => { + * if (!token) return + * if (request.headers.authorization !== `Bearer ${token}`) { + * throw new Error('unauthorized') + * } + * } + */ diff --git a/docs/examples/replication_adapter_node_express.ts b/docs/examples/replication_adapter_node_express.ts new file mode 100644 index 0000000..a081019 --- /dev/null +++ b/docs/examples/replication_adapter_node_express.ts @@ -0,0 +1,186 @@ +/** + * Host-runtime replication HTTP adapter (Node + Express). + * + * Purpose: + * - production-style non-playground embedding + * - end-to-end status/admin/transport wiring + * - token + optional Node TLS mTLS auth via helper APIs + * + * Run: + * npm i express + * export REPLICATION_ADMIN_AUTH_MODE=token_or_mtls + * export REPLICATION_ADMIN_TOKEN=change-me + * tsx replication_adapter_node_express.ts + */ + +import express, { type Request, type Response } from 'express' + +import { Database } from '../../ray-rs/index' +import { + createNodeTlsMtlsMatcher, + createReplicationAdminAuthorizer, + createReplicationTransportAdapter, + type ReplicationAdminAuthMode, + type ReplicationAdminAuthRequest, + type ReplicationTransportAdapter, +} from '../../ray-rs/ts/replication_transport' + +type RequestLike = ReplicationAdminAuthRequest & { + socket?: { authorized?: boolean } + client?: { authorized?: boolean } + raw?: { socket?: { authorized?: boolean } } + req?: { socket?: { authorized?: boolean } } +} + +function parseBool(raw: unknown, fallback: boolean): boolean { + if (raw === undefined || raw === null) return fallback + const normalized = String(raw).trim().toLowerCase() + if (['1', 'true', 'yes'].includes(normalized)) return true + if (['0', 'false', 'no'].includes(normalized)) return false + return fallback +} + +function parsePositiveInt(raw: unknown, fallback: number, max: number): number { + if (raw === undefined || raw === null) return fallback + const parsed = Number(raw) + if (!Number.isFinite(parsed)) return fallback + return Math.min(Math.max(Math.floor(parsed), 1), max) +} + +const DB_PATH = process.env.KITEDB_PATH ?? 'cluster-primary.kitedb' +const SIDECAR_PATH = process.env.KITEDB_REPLICATION_SIDECAR ?? 'cluster-primary.sidecar' +const PORT = parsePositiveInt(process.env.PORT, 8080, 65535) +const AUTH_MODE = + (process.env.REPLICATION_ADMIN_AUTH_MODE as ReplicationAdminAuthMode | undefined) ?? + 'token_or_mtls' +const AUTH_TOKEN = process.env.REPLICATION_ADMIN_TOKEN ?? '' + +const db = Database.open(DB_PATH, { + replicationRole: 'Primary', + replicationSidecarPath: SIDECAR_PATH, +}) + +const adapter: ReplicationTransportAdapter = createReplicationTransportAdapter(db) +const requireAdmin = createReplicationAdminAuthorizer({ + mode: AUTH_MODE, + token: AUTH_TOKEN, + mtlsMatcher: createNodeTlsMtlsMatcher({ requirePeerCertificate: false }), +}) + +const app = express() +app.use(express.json({ limit: '2mb' })) + +function checked(handler: (req: Request, res: Response) => void) { + return (req: Request, res: Response) => { + try { + handler(req, res) + } catch (error) { + res.status(500).json({ error: error instanceof Error ? error.message : String(error) }) + } + } +} + +function ensureAdmin(req: Request): void { + requireAdmin({ + headers: req.headers as Record, + socket: req.socket as RequestLike['socket'], + client: (req as unknown as { client?: RequestLike['client'] }).client, + raw: (req as unknown as { raw?: RequestLike['raw'] }).raw, + req: (req as unknown as { req?: RequestLike['req'] }).req, + }) +} + +app.get( + '/replication/status', + checked((_req, res) => { + res.json({ + primary: db.primaryReplicationStatus(), + replica: db.replicaReplicationStatus(), + }) + }), +) + +app.get( + '/replication/metrics/prometheus', + checked((req, res) => { + ensureAdmin(req) + res.type('text/plain').send(adapter.metricsPrometheus()) + }), +) + +app.get( + '/replication/metrics/otel-json', + checked((req, res) => { + ensureAdmin(req) + res.json(JSON.parse(adapter.metricsOtelJson())) + }), +) + +app.get( + '/replication/transport/snapshot', + checked((req, res) => { + ensureAdmin(req) + const includeData = parseBool(req.query.includeData, false) + res.json(adapter.snapshot(includeData)) + }), +) + +app.get( + '/replication/transport/log', + checked((req, res) => { + ensureAdmin(req) + res.json( + adapter.log({ + cursor: (req.query.cursor as string | undefined) ?? null, + maxFrames: parsePositiveInt(req.query.maxFrames, 128, 10_000), + maxBytes: parsePositiveInt(req.query.maxBytes, 1024 * 1024, 32 * 1024 * 1024), + includePayload: parseBool(req.query.includePayload, true), + }), + ) + }), +) + +app.post( + '/replication/pull', + checked((req, res) => { + ensureAdmin(req) + const maxFrames = parsePositiveInt(req.body?.maxFrames, 256, 100_000) + const appliedFrames = db.replicaCatchUpOnce(maxFrames) + res.json({ appliedFrames, replica: db.replicaReplicationStatus() }) + }), +) + +app.post( + '/replication/reseed', + checked((req, res) => { + ensureAdmin(req) + db.replicaReseedFromSnapshot() + res.json({ replica: db.replicaReplicationStatus() }) + }), +) + +app.post( + '/replication/promote', + checked((req, res) => { + ensureAdmin(req) + const epoch = db.primaryPromoteToNextEpoch() + res.json({ epoch, primary: db.primaryReplicationStatus() }) + }), +) + +const server = app.listen(PORT, () => { + // eslint-disable-next-line no-console + console.log(`replication adapter listening on http://127.0.0.1:${PORT}`) +}) + +function shutdown() { + server.close(() => { + try { + db.close() + } catch {} + process.exit(0) + }) +} + +process.on('SIGINT', shutdown) +process.on('SIGTERM', shutdown) diff --git a/docs/examples/replication_adapter_node_proxy_forwarded.ts b/docs/examples/replication_adapter_node_proxy_forwarded.ts new file mode 100644 index 0000000..22126e4 --- /dev/null +++ b/docs/examples/replication_adapter_node_proxy_forwarded.ts @@ -0,0 +1,186 @@ +/** + * Host-runtime replication HTTP adapter (Node + Express behind reverse proxy). + * + * Purpose: + * - production-style embedding when TLS/mTLS terminates at ingress/proxy + * - forwarded-header mTLS verification + optional token auth + * - end-to-end status/admin/transport wiring + * + * Run: + * npm i express + * export REPLICATION_ADMIN_AUTH_MODE=token_or_mtls + * export REPLICATION_ADMIN_TOKEN=change-me + * export REPLICATION_MTLS_SUBJECT_REGEX='^CN=replication-admin,' + * tsx replication_adapter_node_proxy_forwarded.ts + */ + +import express, { type Request, type Response } from 'express' + +import { Database } from '../../ray-rs/index' +import { + createForwardedTlsMtlsMatcher, + createReplicationAdminAuthorizer, + createReplicationTransportAdapter, + type ReplicationAdminAuthMode, + type ReplicationAdminAuthRequest, + type ReplicationForwardedMtlsMatcherOptions, + type ReplicationTransportAdapter, +} from '../../ray-rs/ts/replication_transport' + +function parseBool(raw: unknown, fallback: boolean): boolean { + if (raw === undefined || raw === null) return fallback + const normalized = String(raw).trim().toLowerCase() + if (['1', 'true', 'yes'].includes(normalized)) return true + if (['0', 'false', 'no'].includes(normalized)) return false + return fallback +} + +function parsePositiveInt(raw: unknown, fallback: number, max: number): number { + if (raw === undefined || raw === null) return fallback + const parsed = Number(raw) + if (!Number.isFinite(parsed)) return fallback + return Math.min(Math.max(Math.floor(parsed), 1), max) +} + +function readHeader(headers: Record, name: string): string | null { + const direct = headers[name] + if (direct && direct.trim().length > 0) return direct.trim() + const target = name.toLowerCase() + for (const [key, value] of Object.entries(headers)) { + if (key.toLowerCase() !== target) continue + if (typeof value !== 'string') continue + const trimmed = value.trim() + if (trimmed.length > 0) return trimmed + } + return null +} + +const DB_PATH = process.env.KITEDB_PATH ?? 'cluster-primary.kitedb' +const SIDECAR_PATH = process.env.KITEDB_REPLICATION_SIDECAR ?? 'cluster-primary.sidecar' +const PORT = parsePositiveInt(process.env.PORT, 8081, 65535) +const AUTH_MODE = + (process.env.REPLICATION_ADMIN_AUTH_MODE as ReplicationAdminAuthMode | undefined) ?? + 'token_or_mtls' +const AUTH_TOKEN = process.env.REPLICATION_ADMIN_TOKEN ?? '' +const CERT_HEADER = (process.env.REPLICATION_MTLS_HEADER ?? 'x-forwarded-client-cert') + .trim() + .toLowerCase() +const SUBJECT_REGEX = process.env.REPLICATION_MTLS_SUBJECT_REGEX + ? new RegExp(process.env.REPLICATION_MTLS_SUBJECT_REGEX) + : null + +const db = Database.open(DB_PATH, { + replicationRole: 'Primary', + replicationSidecarPath: SIDECAR_PATH, +}) + +const adapter: ReplicationTransportAdapter = createReplicationTransportAdapter(db) +const forwardedMatcherOptions: ReplicationForwardedMtlsMatcherOptions = { + requireVerifyHeader: true, + requirePeerCertificate: true, + verifyHeaders: ['x-client-verify', 'ssl-client-verify'], + certHeaders: [CERT_HEADER, 'x-client-cert'], + successValues: ['success', 'verified', 'true', '1'], +} +const forwardedMatcher = createForwardedTlsMtlsMatcher(forwardedMatcherOptions) + +const requireAdmin = createReplicationAdminAuthorizer({ + mode: AUTH_MODE, + token: AUTH_TOKEN, + mtlsMatcher: (request) => { + const forwardedOk = forwardedMatcher(request) + if (!forwardedOk) return false + if (!SUBJECT_REGEX) return true + const certValue = readHeader(request.headers ?? {}, CERT_HEADER) + if (!certValue) return false + return SUBJECT_REGEX.test(certValue) + }, +}) + +const app = express() +app.set('trust proxy', true) +app.use(express.json({ limit: '2mb' })) + +function checked(handler: (req: Request, res: Response) => void) { + return (req: Request, res: Response) => { + try { + handler(req, res) + } catch (error) { + res.status(500).json({ error: error instanceof Error ? error.message : String(error) }) + } + } +} + +function ensureAdmin(req: Request): void { + requireAdmin({ + headers: req.headers as Record, + }) +} + +app.get( + '/replication/status', + checked((_req, res) => { + res.json({ + primary: db.primaryReplicationStatus(), + replica: db.replicaReplicationStatus(), + }) + }), +) + +app.get( + '/replication/metrics/prometheus', + checked((req, res) => { + ensureAdmin(req) + res.type('text/plain').send(adapter.metricsPrometheus()) + }), +) + +app.get( + '/replication/transport/snapshot', + checked((req, res) => { + ensureAdmin(req) + const includeData = parseBool(req.query.includeData, false) + res.json(adapter.snapshot(includeData)) + }), +) + +app.get( + '/replication/transport/log', + checked((req, res) => { + ensureAdmin(req) + res.json( + adapter.log({ + cursor: (req.query.cursor as string | undefined) ?? null, + maxFrames: parsePositiveInt(req.query.maxFrames, 128, 10_000), + maxBytes: parsePositiveInt(req.query.maxBytes, 1024 * 1024, 32 * 1024 * 1024), + includePayload: parseBool(req.query.includePayload, true), + }), + ) + }), +) + +app.post( + '/replication/promote', + checked((req, res) => { + ensureAdmin(req) + const epoch = db.primaryPromoteToNextEpoch() + res.json({ epoch, primary: db.primaryReplicationStatus() }) + }), +) + +const server = app.listen(PORT, () => { + // eslint-disable-next-line no-console + console.log(`proxy-forwarded replication adapter listening on http://127.0.0.1:${PORT}`) +}) + +function shutdown() { + server.close(() => { + try { + db.close() + } catch {} + process.exit(0) + }) +} + +process.on('SIGINT', shutdown) +process.on('SIGTERM', shutdown) diff --git a/docs/examples/replication_adapter_python_fastapi.py b/docs/examples/replication_adapter_python_fastapi.py new file mode 100644 index 0000000..a419d93 --- /dev/null +++ b/docs/examples/replication_adapter_python_fastapi.py @@ -0,0 +1,167 @@ +""" +Host-runtime replication HTTP adapter template (Python + FastAPI). + +Purpose: +- expose replication admin/transport endpoints outside playground runtime +- reuse kitedb host-runtime APIs directly +- include token/mTLS auth parity helper for host adapters + +Run: + pip install fastapi uvicorn kitedb + export REPLICATION_ADMIN_TOKEN=change-me + export REPLICATION_ADMIN_AUTH_MODE=token_or_mtls + uvicorn replication_adapter_python_fastapi:app --host 0.0.0.0 --port 8080 +""" + +from __future__ import annotations + +import json +import os +from dataclasses import dataclass +from typing import Any, Optional + +from fastapi import Depends, FastAPI, HTTPException, Query, Request +from fastapi.responses import PlainTextResponse +from pydantic import BaseModel + +from kitedb import ( + AsgiMtlsMatcherOptions, + Database, + OpenOptions, + ReplicationAdminAuthConfig, + collect_replication_log_transport_json, + collect_replication_metrics_otel_json, + collect_replication_metrics_prometheus, + collect_replication_snapshot_transport_json, + create_asgi_tls_mtls_matcher, + create_replication_admin_authorizer, +) + + +@dataclass(frozen=True) +class Settings: + db_path: str = os.environ.get("KITEDB_PATH", "cluster-primary.kitedb") + replication_admin_auth_mode: str = os.environ.get( + "REPLICATION_ADMIN_AUTH_MODE", "token_or_mtls" + ) + replication_admin_token: str = os.environ.get("REPLICATION_ADMIN_TOKEN", "") + replication_mtls_header: str = os.environ.get( + "REPLICATION_MTLS_HEADER", "x-forwarded-client-cert" + ) + replication_mtls_subject_regex: str = os.environ.get( + "REPLICATION_MTLS_SUBJECT_REGEX", "" + ) + + +SETTINGS = Settings() +DB = Database( + SETTINGS.db_path, + OpenOptions( + replication_role="primary", + replication_sidecar_path=os.environ.get( + "KITEDB_REPLICATION_SIDECAR", + "cluster-primary.sidecar", + ), + ), +) + +app = FastAPI(title="kitedb-replication-adapter") + + +_ADMIN_AUTH = ReplicationAdminAuthConfig( + mode=SETTINGS.replication_admin_auth_mode, # type: ignore[arg-type] + token=SETTINGS.replication_admin_token, + mtls_header=SETTINGS.replication_mtls_header, + mtls_subject_regex=SETTINGS.replication_mtls_subject_regex or None, + mtls_matcher=create_asgi_tls_mtls_matcher( + AsgiMtlsMatcherOptions(require_peer_certificate=False) + ), +) +_REQUIRE_ADMIN = create_replication_admin_authorizer(_ADMIN_AUTH) + + +def _require_admin(request: Request) -> None: + try: + _REQUIRE_ADMIN(request) + except PermissionError as error: + raise HTTPException(status_code=401, detail=str(error)) from error + + +def _json_loads(raw: str, label: str) -> Any: + try: + return json.loads(raw) + except json.JSONDecodeError as error: + raise HTTPException( + status_code=500, + detail=f"invalid {label} payload: {error}", + ) from error + + +class PullRequest(BaseModel): + max_frames: int = 256 + + +@app.get("/replication/status") +def replication_status() -> dict[str, Any]: + return { + "primary": DB.primary_replication_status(), + "replica": DB.replica_replication_status(), + } + + +@app.get("/replication/metrics/prometheus", response_class=PlainTextResponse) +def replication_metrics_prometheus(_: None = Depends(_require_admin)) -> str: + return collect_replication_metrics_prometheus(DB) + + +@app.get("/replication/metrics/otel-json") +def replication_metrics_otel_json(_: None = Depends(_require_admin)) -> Any: + return _json_loads(collect_replication_metrics_otel_json(DB), "otel-json") + + +@app.get("/replication/transport/snapshot") +def replication_snapshot_transport( + include_data: bool = Query(default=False), + _: None = Depends(_require_admin), +) -> Any: + raw = collect_replication_snapshot_transport_json(DB, include_data=include_data) + return _json_loads(raw, "snapshot transport") + + +@app.get("/replication/transport/log") +def replication_log_transport( + cursor: Optional[str] = Query(default=None), + max_frames: int = Query(default=128, ge=1, le=10_000), + max_bytes: int = Query(default=1_048_576, ge=1, le=32 * 1024 * 1024), + include_payload: bool = Query(default=True), + _: None = Depends(_require_admin), +) -> Any: + raw = collect_replication_log_transport_json( + DB, + cursor=cursor, + max_frames=max_frames, + max_bytes=max_bytes, + include_payload=include_payload, + ) + return _json_loads(raw, "log transport") + + +@app.post("/replication/pull") +def replication_pull(body: PullRequest, _: None = Depends(_require_admin)) -> dict[str, Any]: + applied = DB.replica_catch_up_once(body.max_frames) + return { + "applied_frames": applied, + "replica": DB.replica_replication_status(), + } + + +@app.post("/replication/reseed") +def replication_reseed(_: None = Depends(_require_admin)) -> dict[str, Any]: + DB.replica_reseed_from_snapshot() + return {"replica": DB.replica_replication_status()} + + +@app.post("/replication/promote") +def replication_promote(_: None = Depends(_require_admin)) -> dict[str, Any]: + epoch = DB.primary_promote_to_next_epoch() + return {"epoch": epoch, "primary": DB.primary_replication_status()} diff --git a/playground/PLAN.md b/playground/PLAN.md index 3efb728..2afce0f 100644 --- a/playground/PLAN.md +++ b/playground/PLAN.md @@ -166,7 +166,14 @@ playground/ ```typescript // Database Management GET /api/status → { connected: boolean, path?: string, nodeCount?: number, edgeCount?: number } -POST /api/db/open ← { path: string } → { success: boolean, error?: string } +GET /api/replication/status → { connected: boolean, role: "primary"|"replica"|"disabled", primary?: ..., replica?: ... } +GET /api/replication/metrics → text/plain (Prometheus exposition format) +GET /api/replication/snapshot/latest → { success: boolean, snapshot?: { byteLength, sha256, ... } } +GET /api/replication/log?cursor=...&maxBytes=...&maxFrames=... → { success: boolean, frames: [...], nextCursor, eof } +POST /api/replication/pull ← { maxFrames?: number } → { success: boolean, appliedFrames?: number, replica?: ... } +POST /api/replication/reseed → { success: boolean, replica?: ... } +POST /api/replication/promote → { success: boolean, epoch?: number, primary?: ... } +POST /api/db/open ← { path: string, options?: { readOnly?, syncMode?, replicationRole?, ... } } → { success: boolean, error?: string } POST /api/db/upload ← FormData (file) → { success: boolean, error?: string } POST /api/db/demo → { success: boolean } POST /api/db/close → { success: boolean } @@ -193,6 +200,19 @@ POST /api/graph/path ← { startKey: string, endKey: string } → { pat POST /api/graph/impact ← { nodeKey: string } → { impacted: string[], edges: string[] } ``` +Replication admin auth: +- Auth mode envs: + - `REPLICATION_ADMIN_AUTH_MODE` = `none|token|mtls|token_or_mtls|token_and_mtls` + - `REPLICATION_ADMIN_TOKEN` for token modes + - `REPLICATION_MTLS_HEADER` (default `x-forwarded-client-cert`) for mTLS modes + - `REPLICATION_MTLS_SUBJECT_REGEX` optional subject filter for mTLS modes + - `REPLICATION_MTLS_NATIVE_TLS=true` to treat native HTTPS + client-cert verification as mTLS auth + - `PLAYGROUND_TLS_CERT_FILE` + `PLAYGROUND_TLS_KEY_FILE` enable HTTPS listener + - `PLAYGROUND_TLS_CA_FILE` optional custom client-cert CA bundle + - `PLAYGROUND_TLS_REQUEST_CERT` + `PLAYGROUND_TLS_REJECT_UNAUTHORIZED` for TLS client-cert enforcement +- Admin endpoints (`/snapshot/latest`, `/metrics`, `/log`, `/pull`, `/reseed`, `/promote`) enforce the selected mode. +- `/api/replication/status` remains readable without auth. + --- ## Node/Edge Visualization Format diff --git a/playground/package.json b/playground/package.json index 9dbe234..441921a 100644 --- a/playground/package.json +++ b/playground/package.json @@ -6,7 +6,8 @@ "scripts": { "dev": "bun run --watch src/server.ts", "start": "bun run src/server.ts", - "build": "bun run build.ts" + "build": "bun run build.ts", + "test": "bun test" }, "dependencies": { "elysia": "^1.2.0", diff --git a/playground/src/api/db.ts b/playground/src/api/db.ts index be6b00a..8155c77 100644 --- a/playground/src/api/db.ts +++ b/playground/src/api/db.ts @@ -8,12 +8,13 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { type Kite, + type KiteOptions, defineEdge, defineNode, kite, optional, prop, -} from "../../../src/index.ts"; +} from "../../../ray-rs/ts/index.ts"; import { createDemoGraph } from "./demo-data.ts"; import { mkdtemp, rm, writeFile } from "node:fs/promises"; @@ -75,16 +76,19 @@ interface DbState { let currentDb: DbState | null = null; +export type PlaygroundOpenOptions = Omit; + /** * Open a database from a file path */ export async function openDatabase( path: string, + options?: PlaygroundOpenOptions, ): Promise<{ success: boolean; error?: string }> { try { await closeDatabase(); - const db = await kite(path, { nodes, edges }); + const db = await kite(path, { nodes, edges, ...(options ?? {}) }); currentDb = { db, path, isDemo: false }; return { success: true }; diff --git a/playground/src/api/routes.replication.test.ts b/playground/src/api/routes.replication.test.ts new file mode 100644 index 0000000..b9270d3 --- /dev/null +++ b/playground/src/api/routes.replication.test.ts @@ -0,0 +1,1325 @@ +import { afterEach, beforeAll, describe, expect, test } from "bun:test"; +import { createHash } from "node:crypto"; +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +process.env.REPLICATION_ADMIN_TOKEN = "test-repl-admin-token"; + +const { Elysia } = await import("elysia"); +const { apiRoutes } = await import("./routes.ts"); +const { closeDatabase, getDb, FileNode } = await import("./db.ts"); + +const AUTH_HEADER = { + Authorization: `Bearer ${process.env.REPLICATION_ADMIN_TOKEN}`, +}; + +interface JsonResponse> { + status: number; + body: T; +} + +interface TextResponse { + status: number; + body: string; +} + +interface ManifestEnvelope { + version: number; + payload_crc32: number; + manifest: { + version: number; + epoch: number; + head_log_index: number; + retained_floor: number; + active_segment_id: number; + segments: Array<{ + id: number; + start_log_index: number; + end_log_index: number; + size_bytes: number; + }>; + }; +} + +let app: InstanceType; +let tempDir: string; +let dbPath: string; + +type ReplicationAuthEnvKey = + | "REPLICATION_ADMIN_TOKEN" + | "REPLICATION_ADMIN_AUTH_MODE" + | "REPLICATION_MTLS_HEADER" + | "REPLICATION_MTLS_SUBJECT_REGEX" + | "REPLICATION_MTLS_NATIVE_TLS" + | "PLAYGROUND_TLS_REQUEST_CERT" + | "PLAYGROUND_TLS_REJECT_UNAUTHORIZED"; + +async function withReplicationAuthEnv( + overrides: Partial>, + run: () => Promise, +): Promise { + const keys: ReplicationAuthEnvKey[] = [ + "REPLICATION_ADMIN_TOKEN", + "REPLICATION_ADMIN_AUTH_MODE", + "REPLICATION_MTLS_HEADER", + "REPLICATION_MTLS_SUBJECT_REGEX", + "REPLICATION_MTLS_NATIVE_TLS", + "PLAYGROUND_TLS_REQUEST_CERT", + "PLAYGROUND_TLS_REJECT_UNAUTHORIZED", + ]; + const previous: Partial> = {}; + for (const key of keys) { + previous[key] = process.env[key]; + } + + for (const [key, value] of Object.entries(overrides) as Array< + [ReplicationAuthEnvKey, string | null] + >) { + if (value === null) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + + try { + return await run(); + } finally { + for (const key of keys) { + const value = previous[key]; + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + } +} + +async function requestJson>( + method: string, + path: string, + body?: unknown, + headers?: Record, + origin = "http://localhost", +): Promise> { + const request = new Request(`${origin}${path}`, { + method, + headers: { + ...(body !== undefined ? { "content-type": "application/json" } : {}), + ...(headers ?? {}), + }, + body: body !== undefined ? JSON.stringify(body) : undefined, + }); + + const response = await app.handle(request); + return { + status: response.status, + body: (await response.json()) as T, + }; +} + +async function requestText( + method: string, + path: string, + body?: unknown, + headers?: Record, + origin = "http://localhost", +): Promise { + const request = new Request(`${origin}${path}`, { + method, + headers: { + ...(body !== undefined ? { "content-type": "application/json" } : {}), + ...(headers ?? {}), + }, + body: body !== undefined ? JSON.stringify(body) : undefined, + }); + + const response = await app.handle(request); + return { + status: response.status, + body: await response.text(), + }; +} + +async function openPrimary(): Promise { + tempDir = await mkdtemp(join(tmpdir(), "playground-repl-test-")); + dbPath = join(tempDir, "primary.kitedb"); + + const response = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/db/open", + { + path: dbPath, + options: { + replicationRole: "primary", + }, + }, + ); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(true); +} + +async function appendCommits(count: number): Promise { + const db = getDb(); + expect(db).not.toBeNull(); + for (let i = 0; i < count; i++) { + await db! + .insert(FileNode) + .values({ + key: `src/file-${i}.ts`, + path: `src/file-${i}.ts`, + language: "typescript", + }) + .returning(); + } +} + +beforeAll(() => { + app = new Elysia().use(apiRoutes); +}); + +afterEach(async () => { + await closeDatabase(); + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + } +}); + +describe("replication log endpoints", () => { + test("paginates log frames using maxFrames + nextCursor", async () => { + await openPrimary(); + await appendCommits(5); + + const first = await requestJson<{ + success: boolean; + frameCount: number; + eof: boolean; + nextCursor: string | null; + frames: Array<{ logIndex: string }>; + }>("GET", "/api/replication/log?maxFrames=2", undefined, AUTH_HEADER); + + expect(first.status).toBe(200); + expect(first.body.success).toBe(true); + expect(first.body.frameCount).toBe(2); + expect(first.body.eof).toBe(false); + expect(first.body.nextCursor).toBeTruthy(); + expect(first.body.frames.length).toBe(2); + + const lastFirstLogIndex = BigInt(first.body.frames[1].logIndex); + const second = await requestJson<{ + success: boolean; + frameCount: number; + frames: Array<{ logIndex: string }>; + cursor: string | null; + }>( + "GET", + `/api/replication/log?maxFrames=2&cursor=${encodeURIComponent(first.body.nextCursor!)}`, + undefined, + AUTH_HEADER, + ); + + expect(second.status).toBe(200); + expect(second.body.success).toBe(true); + expect(second.body.cursor).toBe(first.body.nextCursor); + expect(second.body.frameCount).toBeGreaterThan(0); + expect(BigInt(second.body.frames[0].logIndex) > lastFirstLogIndex).toBe(true); + }); + + test("respects maxBytes and returns one frame minimum", async () => { + await openPrimary(); + await appendCommits(3); + + const response = await requestJson<{ + success: boolean; + frameCount: number; + eof: boolean; + totalBytes: number; + nextCursor: string | null; + }>("GET", "/api/replication/log?maxBytes=1", undefined, AUTH_HEADER); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(true); + expect(response.body.frameCount).toBe(1); + expect(response.body.totalBytes).toBeGreaterThan(0); + expect(response.body.eof).toBe(false); + expect(response.body.nextCursor).toBeTruthy(); + }); + + test("returns structured error on malformed cursor", async () => { + await openPrimary(); + await appendCommits(1); + + const response = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log?cursor=bad-cursor", + undefined, + AUTH_HEADER, + ); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(false); + expect(response.body.error).toBeTruthy(); + }); + + test("returns structured error on malformed 4-part cursor with non-numeric components", async () => { + await openPrimary(); + await appendCommits(2); + + const response = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log?cursor=1:abc:def:ghi", + undefined, + AUTH_HEADER, + ); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(false); + expect(response.body.error).toBeTruthy(); + }); + + test("returns structured error on cursor with too many components", async () => { + await openPrimary(); + await appendCommits(2); + + const response = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log?cursor=1:2:3:4:5", + undefined, + AUTH_HEADER, + ); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(false); + expect(response.body.error).toBeTruthy(); + }); + + test("accepts cursors with empty numeric components as zero (current behavior)", async () => { + await openPrimary(); + await appendCommits(2); + + const emptySegmentId = await requestJson<{ + success: boolean; + frameCount: number; + cursor: string | null; + nextCursor: string | null; + }>( + "GET", + "/api/replication/log?cursor=1::3:4", + undefined, + AUTH_HEADER, + ); + expect(emptySegmentId.status).toBe(200); + expect(emptySegmentId.body.success).toBe(true); + expect(emptySegmentId.body.cursor).toBe("1::3:4"); + expect(emptySegmentId.body.frameCount).toBe(0); + expect(emptySegmentId.body.nextCursor).toBe("1::3:4"); + + const emptyEpoch = await requestJson<{ + success: boolean; + frameCount: number; + cursor: string | null; + nextCursor: string | null; + }>( + "GET", + "/api/replication/log?cursor=:2", + undefined, + AUTH_HEADER, + ); + expect(emptyEpoch.status).toBe(200); + expect(emptyEpoch.body.success).toBe(true); + expect(emptyEpoch.body.cursor).toBe(":2"); + expect(emptyEpoch.body.frameCount).toBe(2); + expect(emptyEpoch.body.nextCursor).toBeTruthy(); + }); + + test("accepts 2-part cursor format epoch:logIndex", async () => { + await openPrimary(); + await appendCommits(5); + + const first = await requestJson<{ + success: boolean; + frameCount: number; + frames: Array<{ epoch: string; logIndex: string }>; + }>("GET", "/api/replication/log?maxFrames=2", undefined, AUTH_HEADER); + expect(first.status).toBe(200); + expect(first.body.success).toBe(true); + expect(first.body.frameCount).toBe(2); + + const cursor = `${first.body.frames[0].epoch}:${first.body.frames[0].logIndex}`; + const second = await requestJson<{ + success: boolean; + frameCount: number; + frames: Array<{ logIndex: string }>; + }>( + "GET", + `/api/replication/log?maxFrames=4&cursor=${encodeURIComponent(cursor)}`, + undefined, + AUTH_HEADER, + ); + + expect(second.status).toBe(200); + expect(second.body.success).toBe(true); + expect(second.body.frameCount).toBeGreaterThan(0); + expect(BigInt(second.body.frames[0].logIndex) > BigInt(first.body.frames[0].logIndex)).toBe( + true, + ); + }); + + test("4-part cursor resumes consistently at frame start vs frame end offset", async () => { + await openPrimary(); + await appendCommits(5); + + const firstPage = await requestJson<{ + success: boolean; + frameCount: number; + nextCursor: string | null; + frames: Array<{ + epoch: string; + segmentId: string; + segmentOffset: string; + logIndex: string; + payloadBase64: string; + }>; + }>( + "GET", + "/api/replication/log?maxFrames=1&includePayload=false", + undefined, + AUTH_HEADER, + ); + expect(firstPage.status).toBe(200); + expect(firstPage.body.success).toBe(true); + expect(firstPage.body.frameCount).toBe(1); + expect(firstPage.body.nextCursor).toBeTruthy(); + + const firstFrame = firstPage.body.frames[0]; + const startCursor = `${firstFrame.epoch}:${firstFrame.segmentId}:${firstFrame.segmentOffset}:${firstFrame.logIndex}`; + + const resumedFromStart = await requestJson<{ + success: boolean; + frameCount: number; + frames: Array<{ logIndex: string; payloadBase64: string }>; + }>( + "GET", + `/api/replication/log?maxFrames=3&includePayload=false&cursor=${encodeURIComponent(startCursor)}`, + undefined, + AUTH_HEADER, + ); + expect(resumedFromStart.status).toBe(200); + expect(resumedFromStart.body.success).toBe(true); + expect(resumedFromStart.body.frameCount).toBeGreaterThan(0); + expect( + BigInt(resumedFromStart.body.frames[0].logIndex) > BigInt(firstFrame.logIndex), + ).toBe(true); + + const resumedFromEnd = await requestJson<{ + success: boolean; + frameCount: number; + frames: Array<{ logIndex: string; payloadBase64: string }>; + }>( + "GET", + `/api/replication/log?maxFrames=3&includePayload=false&cursor=${encodeURIComponent(firstPage.body.nextCursor!)}`, + undefined, + AUTH_HEADER, + ); + expect(resumedFromEnd.status).toBe(200); + expect(resumedFromEnd.body.success).toBe(true); + expect(resumedFromEnd.body.frameCount).toBeGreaterThan(0); + + expect(resumedFromEnd.body.frames[0].logIndex).toBe( + resumedFromStart.body.frames[0].logIndex, + ); + expect(resumedFromStart.body.frames[0].payloadBase64).toBe(""); + expect(resumedFromEnd.body.frames[0].payloadBase64).toBe(""); + }); + + test("supports includePayload=false while preserving paging cursors", async () => { + await openPrimary(); + await appendCommits(4); + + const first = await requestJson<{ + success: boolean; + frameCount: number; + nextCursor: string | null; + frames: Array<{ payloadBase64: string; logIndex: string }>; + }>( + "GET", + "/api/replication/log?maxFrames=2&includePayload=false", + undefined, + AUTH_HEADER, + ); + + expect(first.status).toBe(200); + expect(first.body.success).toBe(true); + expect(first.body.frameCount).toBe(2); + expect(first.body.nextCursor).toBeTruthy(); + for (const frame of first.body.frames) { + expect(frame.payloadBase64).toBe(""); + } + + const lastFirstLogIndex = BigInt(first.body.frames[1].logIndex); + const second = await requestJson<{ + success: boolean; + frameCount: number; + frames: Array<{ payloadBase64: string; logIndex: string }>; + }>( + "GET", + `/api/replication/log?maxFrames=2&includePayload=false&cursor=${encodeURIComponent(first.body.nextCursor!)}`, + undefined, + AUTH_HEADER, + ); + + expect(second.status).toBe(200); + expect(second.body.success).toBe(true); + expect(second.body.frameCount).toBeGreaterThan(0); + for (const frame of second.body.frames) { + expect(frame.payloadBase64).toBe(""); + } + expect(BigInt(second.body.frames[0].logIndex) > lastFirstLogIndex).toBe(true); + }); + + test("includePayload=false still honors maxBytes paging and cursor resume", async () => { + await openPrimary(); + await appendCommits(4); + + const first = await requestJson<{ + success: boolean; + frameCount: number; + totalBytes: number; + nextCursor: string | null; + eof: boolean; + frames: Array<{ payloadBase64: string; logIndex: string }>; + }>( + "GET", + "/api/replication/log?includePayload=false&maxBytes=1", + undefined, + AUTH_HEADER, + ); + + expect(first.status).toBe(200); + expect(first.body.success).toBe(true); + expect(first.body.frameCount).toBe(1); + expect(first.body.totalBytes).toBeGreaterThan(0); + expect(first.body.eof).toBe(false); + expect(first.body.nextCursor).toBeTruthy(); + expect(first.body.frames[0].payloadBase64).toBe(""); + + const firstLogIndex = BigInt(first.body.frames[0].logIndex); + const second = await requestJson<{ + success: boolean; + frameCount: number; + totalBytes: number; + nextCursor: string | null; + eof: boolean; + frames: Array<{ payloadBase64: string; logIndex: string }>; + }>( + "GET", + `/api/replication/log?includePayload=false&maxBytes=1&cursor=${encodeURIComponent(first.body.nextCursor!)}`, + undefined, + AUTH_HEADER, + ); + + expect(second.status).toBe(200); + expect(second.body.success).toBe(true); + expect(second.body.frameCount).toBe(1); + expect(second.body.totalBytes).toBeGreaterThan(0); + expect(second.body.nextCursor).toBeTruthy(); + expect(second.body.frames[0].payloadBase64).toBe(""); + expect(BigInt(second.body.frames[0].logIndex) > firstLogIndex).toBe(true); + }); + + test("replication log uses sane defaults when query params are omitted", async () => { + await openPrimary(); + await appendCommits(3); + + const response = await requestJson<{ + success: boolean; + frameCount: number; + eof: boolean; + nextCursor: string | null; + frames: Array<{ payloadBase64: string }>; + }>("GET", "/api/replication/log", undefined, AUTH_HEADER); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(true); + expect(response.body.frameCount).toBeGreaterThan(0); + expect(response.body.frameCount).toBeLessThanOrEqual(256); + expect(response.body.eof).toBe(true); + expect(response.body.nextCursor).toBeTruthy(); + for (const frame of response.body.frames) { + expect(frame.payloadBase64.length).toBeGreaterThan(0); + } + }); + + test("replication log clamps out-of-range maxFrames/maxBytes query values", async () => { + await openPrimary(); + await appendCommits(5); + + const zeroFrames = await requestJson<{ + success: boolean; + frameCount: number; + eof: boolean; + frames: Array<{ payloadBase64: string }>; + }>( + "GET", + "/api/replication/log?includePayload=false&maxFrames=0&maxBytes=999999999", + undefined, + AUTH_HEADER, + ); + expect(zeroFrames.status).toBe(200); + expect(zeroFrames.body.success).toBe(true); + expect(zeroFrames.body.frameCount).toBe(1); + expect(zeroFrames.body.eof).toBe(false); + expect(zeroFrames.body.frames[0].payloadBase64).toBe(""); + + const negativeFrames = await requestJson<{ + success: boolean; + frameCount: number; + eof: boolean; + frames: Array<{ payloadBase64: string }>; + }>( + "GET", + "/api/replication/log?includePayload=false&maxFrames=-10&maxBytes=999999999", + undefined, + AUTH_HEADER, + ); + expect(negativeFrames.status).toBe(200); + expect(negativeFrames.body.success).toBe(true); + expect(negativeFrames.body.frameCount).toBe(1); + expect(negativeFrames.body.eof).toBe(false); + expect(negativeFrames.body.frames[0].payloadBase64).toBe(""); + + const negativeBytes = await requestJson<{ + success: boolean; + frameCount: number; + eof: boolean; + totalBytes: number; + frames: Array<{ payloadBase64: string }>; + }>( + "GET", + "/api/replication/log?includePayload=false&maxFrames=999999&maxBytes=-7", + undefined, + AUTH_HEADER, + ); + expect(negativeBytes.status).toBe(200); + expect(negativeBytes.body.success).toBe(true); + expect(negativeBytes.body.frameCount).toBe(1); + expect(negativeBytes.body.totalBytes).toBeGreaterThan(0); + expect(negativeBytes.body.eof).toBe(false); + expect(negativeBytes.body.frames[0].payloadBase64).toBe(""); + }); + + test("replication log falls back to defaults on invalid query values", async () => { + await openPrimary(); + await appendCommits(10); + + const response = await requestJson<{ + success: boolean; + frameCount: number; + eof: boolean; + nextCursor: string | null; + frames: Array<{ payloadBase64: string }>; + }>( + "GET", + "/api/replication/log?maxFrames=abc&maxBytes=nan&includePayload=maybe", + undefined, + AUTH_HEADER, + ); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(true); + expect(response.body.frameCount).toBeGreaterThan(1); + expect(response.body.frameCount).toBeLessThanOrEqual(256); + expect(response.body.eof).toBe(true); + expect(response.body.nextCursor).toBeTruthy(); + for (const frame of response.body.frames) { + expect(frame.payloadBase64.length).toBeGreaterThan(0); + } + }); + + test("snapshot includeData=true returns consistent bytes/hash metadata", async () => { + await openPrimary(); + await appendCommits(3); + + const response = await requestJson<{ + success: boolean; + role?: string; + snapshot?: { + dbPath?: string; + byteLength?: number; + sha256?: string; + dataBase64?: string; + }; + }>("GET", "/api/replication/snapshot/latest?includeData=true", undefined, AUTH_HEADER); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(true); + expect(response.body.role).toBe("primary"); + + const snapshot = response.body.snapshot; + expect(snapshot).toBeTruthy(); + expect(snapshot?.dbPath).toBeTruthy(); + expect(snapshot?.byteLength).toBeGreaterThan(0); + expect(snapshot?.sha256).toBeTruthy(); + expect(snapshot?.dataBase64).toBeTruthy(); + + const decoded = Buffer.from(snapshot!.dataBase64!, "base64"); + expect(decoded.byteLength).toBe(snapshot!.byteLength); + + const fileBytes = await readFile(snapshot!.dbPath!); + expect(fileBytes.byteLength).toBe(snapshot!.byteLength); + expect(Buffer.compare(decoded, fileBytes)).toBe(0); + + const computed = createHash("sha256").update(fileBytes).digest("hex"); + expect(computed).toBe(snapshot!.sha256); + }); + + test("snapshot includeData=false omits payload but keeps valid metadata", async () => { + await openPrimary(); + await appendCommits(2); + + const response = await requestJson<{ + success: boolean; + role?: string; + snapshot?: { + dbPath?: string; + byteLength?: number; + sha256?: string; + dataBase64?: string; + }; + }>("GET", "/api/replication/snapshot/latest?includeData=false", undefined, AUTH_HEADER); + + expect(response.status).toBe(200); + expect(response.body.success).toBe(true); + expect(response.body.role).toBe("primary"); + + const snapshot = response.body.snapshot; + expect(snapshot).toBeTruthy(); + expect(snapshot?.dbPath).toBeTruthy(); + expect(snapshot?.byteLength).toBeGreaterThan(0); + expect(snapshot?.sha256).toBeTruthy(); + expect(snapshot?.dataBase64).toBeUndefined(); + + const fileBytes = await readFile(snapshot!.dbPath!); + expect(fileBytes.byteLength).toBe(snapshot!.byteLength); + const computed = createHash("sha256").update(fileBytes).digest("hex"); + expect(computed).toBe(snapshot!.sha256); + }); + + test("enforces bearer token on protected endpoints", async () => { + await openPrimary(); + + const unauthorized = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log", + ); + + expect(unauthorized.status).toBe(401); + expect(unauthorized.body.success).toBe(false); + expect(unauthorized.body.error).toContain("Unauthorized"); + + const authorized = await requestJson<{ success: boolean }>( + "GET", + "/api/replication/log", + undefined, + AUTH_HEADER, + ); + expect(authorized.status).toBe(200); + expect(authorized.body.success).toBe(true); + }); + + test("replication status remains readable without bearer token", async () => { + await openPrimary(); + await appendCommits(1); + + const publicStatus = await requestJson<{ + connected: boolean; + authEnabled: boolean; + role: string; + primary?: { headLogIndex?: number }; + }>("GET", "/api/replication/status"); + expect(publicStatus.status).toBe(200); + expect(publicStatus.body.connected).toBe(true); + expect(publicStatus.body.authEnabled).toBe(true); + expect(publicStatus.body.role).toBe("primary"); + expect((publicStatus.body.primary?.headLogIndex ?? 0) > 0).toBe(true); + + const adminBlocked = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log", + ); + expect(adminBlocked.status).toBe(401); + expect(adminBlocked.body.success).toBe(false); + }); + + test("replication metrics endpoint exports Prometheus text when authorized", async () => { + await openPrimary(); + await appendCommits(3); + + const metrics = await requestText( + "GET", + "/api/replication/metrics", + undefined, + AUTH_HEADER, + ); + + expect(metrics.status).toBe(200); + expect(metrics.body).toContain("# HELP raydb_replication_enabled"); + expect(metrics.body).toContain("# TYPE raydb_replication_enabled gauge"); + expect(metrics.body).toContain('raydb_replication_enabled{role="primary"} 1'); + expect(metrics.body).toContain("raydb_replication_primary_head_log_index"); + expect(metrics.body).toContain("raydb_replication_primary_append_attempts_total"); + }); + + test("replication metrics endpoint requires bearer token", async () => { + await openPrimary(); + + const unauthorized = await requestText("GET", "/api/replication/metrics"); + expect(unauthorized.status).toBe(401); + expect(unauthorized.body).toContain("Unauthorized"); + }); + + test("supports mTLS-only admin auth mode", async () => { + await openPrimary(); + await appendCommits(1); + + await withReplicationAuthEnv( + { + REPLICATION_ADMIN_AUTH_MODE: "mtls", + REPLICATION_MTLS_HEADER: "x-client-cert", + REPLICATION_MTLS_SUBJECT_REGEX: "^CN=allowed", + }, + async () => { + const noMtls = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log", + ); + expect(noMtls.status).toBe(401); + expect(noMtls.body.success).toBe(false); + + const badSubject = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log", + undefined, + { "x-client-cert": "CN=denied-client" }, + ); + expect(badSubject.status).toBe(401); + expect(badSubject.body.success).toBe(false); + + const goodSubject = await requestJson<{ success: boolean }>( + "GET", + "/api/replication/log", + undefined, + { "x-client-cert": "CN=allowed-client,O=RayDB" }, + ); + expect(goodSubject.status).toBe(200); + expect(goodSubject.body.success).toBe(true); + }, + ); + }); + + test("supports native TLS mTLS auth mode without proxy header", async () => { + await openPrimary(); + await appendCommits(1); + + await withReplicationAuthEnv( + { + REPLICATION_ADMIN_AUTH_MODE: "mtls", + REPLICATION_MTLS_NATIVE_TLS: "true", + PLAYGROUND_TLS_REQUEST_CERT: "true", + PLAYGROUND_TLS_REJECT_UNAUTHORIZED: "true", + REPLICATION_MTLS_HEADER: null, + REPLICATION_MTLS_SUBJECT_REGEX: null, + }, + async () => { + const httpRequest = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log", + ); + expect(httpRequest.status).toBe(401); + expect(httpRequest.body.success).toBe(false); + + const httpsRequest = await requestJson<{ success: boolean }>( + "GET", + "/api/replication/log", + undefined, + undefined, + "https://localhost", + ); + expect(httpsRequest.status).toBe(200); + expect(httpsRequest.body.success).toBe(true); + }, + ); + }); + + test("rejects invalid native TLS mTLS config", async () => { + await openPrimary(); + await appendCommits(1); + + await withReplicationAuthEnv( + { + REPLICATION_ADMIN_AUTH_MODE: "mtls", + REPLICATION_MTLS_NATIVE_TLS: "true", + PLAYGROUND_TLS_REQUEST_CERT: "false", + PLAYGROUND_TLS_REJECT_UNAUTHORIZED: "true", + }, + async () => { + const response = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log", + ); + expect(response.status).toBe(500); + expect(response.body.success).toBe(false); + expect(response.body.error).toContain("REPLICATION_MTLS_NATIVE_TLS requires"); + }, + ); + }); + + test("supports token_and_mtls admin auth mode", async () => { + await openPrimary(); + await appendCommits(1); + + await withReplicationAuthEnv( + { + REPLICATION_ADMIN_TOKEN: "combo-token", + REPLICATION_ADMIN_AUTH_MODE: "token_and_mtls", + REPLICATION_MTLS_HEADER: "x-client-cert", + REPLICATION_MTLS_SUBJECT_REGEX: "^CN=combo$", + }, + async () => { + const tokenOnly = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log", + undefined, + { Authorization: "Bearer combo-token" }, + ); + expect(tokenOnly.status).toBe(401); + expect(tokenOnly.body.success).toBe(false); + + const mtlsOnly = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/log", + undefined, + { "x-client-cert": "CN=combo" }, + ); + expect(mtlsOnly.status).toBe(401); + expect(mtlsOnly.body.success).toBe(false); + + const both = await requestJson<{ success: boolean }>( + "GET", + "/api/replication/log", + undefined, + { + Authorization: "Bearer combo-token", + "x-client-cert": "CN=combo", + }, + ); + expect(both.status).toBe(200); + expect(both.body.success).toBe(true); + }, + ); + }); + + test("rejects snapshot, pull, reseed, and promote without bearer token", async () => { + await openPrimary(); + + const snapshot = await requestJson<{ success: boolean; error?: string }>( + "GET", + "/api/replication/snapshot/latest", + ); + expect(snapshot.status).toBe(401); + expect(snapshot.body.success).toBe(false); + expect(snapshot.body.error).toContain("Unauthorized"); + + const pull = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/replication/pull", + { maxFrames: 1 }, + ); + expect(pull.status).toBe(401); + expect(pull.body.success).toBe(false); + expect(pull.body.error).toContain("Unauthorized"); + + const reseed = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/replication/reseed", + ); + expect(reseed.status).toBe(401); + expect(reseed.body.success).toBe(false); + expect(reseed.body.error).toContain("Unauthorized"); + + const promote = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/replication/promote", + ); + expect(promote.status).toBe(401); + expect(promote.body.success).toBe(false); + expect(promote.body.error).toContain("Unauthorized"); + }); + + test("reseed on primary role returns structured error", async () => { + await openPrimary(); + + const reseed = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/replication/reseed", + undefined, + AUTH_HEADER, + ); + expect(reseed.status).toBe(200); + expect(reseed.body.success).toBe(false); + expect(reseed.body.error).toContain("replica role"); + }); + + test("reseed is idempotent on healthy replica", async () => { + await openPrimary(); + await appendCommits(4); + + const replicaPath = join(tempDir, "replica-reseed-idempotent.kitedb"); + const openReplica = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/db/open", + { + path: replicaPath, + options: { + replicationRole: "replica", + replicationSourceDbPath: dbPath, + }, + }, + ); + expect(openReplica.status).toBe(200); + expect(openReplica.body.success).toBe(true); + + const first = await requestJson<{ + success: boolean; + role: string; + replica?: { needsReseed?: boolean; lastError?: string | null; appliedLogIndex?: number }; + }>("POST", "/api/replication/reseed", undefined, AUTH_HEADER); + expect(first.status).toBe(200); + expect(first.body.success).toBe(true); + expect(first.body.role).toBe("replica"); + expect(first.body.replica?.needsReseed).toBe(false); + expect(first.body.replica?.lastError ?? null).toBeNull(); + expect((first.body.replica?.appliedLogIndex ?? 0) > 0).toBe(true); + + const second = await requestJson<{ + success: boolean; + role: string; + replica?: { needsReseed?: boolean; lastError?: string | null; appliedLogIndex?: number }; + }>("POST", "/api/replication/reseed", undefined, AUTH_HEADER); + expect(second.status).toBe(200); + expect(second.body.success).toBe(true); + expect(second.body.role).toBe("replica"); + expect(second.body.replica?.needsReseed).toBe(false); + expect(second.body.replica?.lastError ?? null).toBeNull(); + expect(second.body.replica?.appliedLogIndex).toBe(first.body.replica?.appliedLogIndex); + }); + + test("reseed baseline allows later incremental pull after new primary commits", async () => { + await openPrimary(); + await appendCommits(4); + + const replicaPath = join(tempDir, "replica-reseed-continuity.kitedb"); + const openReplica = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/db/open", + { + path: replicaPath, + options: { + replicationRole: "replica", + replicationSourceDbPath: dbPath, + }, + }, + ); + expect(openReplica.status).toBe(200); + expect(openReplica.body.success).toBe(true); + + const reseed = await requestJson<{ + success: boolean; + role: string; + replica?: { needsReseed?: boolean; lastError?: string | null; appliedLogIndex?: number }; + }>("POST", "/api/replication/reseed", undefined, AUTH_HEADER); + expect(reseed.status).toBe(200); + expect(reseed.body.success).toBe(true); + expect(reseed.body.role).toBe("replica"); + expect(reseed.body.replica?.needsReseed).toBe(false); + expect(reseed.body.replica?.lastError ?? null).toBeNull(); + const baselineApplied = reseed.body.replica?.appliedLogIndex ?? 0; + expect(baselineApplied > 0).toBe(true); + + const reopenPrimary = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/db/open", + { + path: dbPath, + options: { + replicationRole: "primary", + }, + }, + ); + expect(reopenPrimary.status).toBe(200); + expect(reopenPrimary.body.success).toBe(true); + await appendCommits(3); + + const reopenReplica = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/db/open", + { + path: replicaPath, + options: { + replicationRole: "replica", + replicationSourceDbPath: dbPath, + }, + }, + ); + expect(reopenReplica.status).toBe(200); + expect(reopenReplica.body.success).toBe(true); + + const beforePull = await requestJson<{ + connected: boolean; + role: string; + replica?: { appliedLogIndex?: number; needsReseed?: boolean }; + }>("GET", "/api/replication/status"); + expect(beforePull.status).toBe(200); + expect(beforePull.body.role).toBe("replica"); + expect(beforePull.body.replica?.needsReseed).toBe(false); + expect(beforePull.body.replica?.appliedLogIndex).toBe(baselineApplied); + + const pull = await requestJson<{ + success: boolean; + appliedFrames?: number; + replica?: { appliedLogIndex?: number; needsReseed?: boolean }; + }>("POST", "/api/replication/pull", { maxFrames: 128 }, AUTH_HEADER); + expect(pull.status).toBe(200); + expect(pull.body.success).toBe(true); + expect((pull.body.appliedFrames ?? 0) > 0).toBe(true); + expect(pull.body.replica?.needsReseed).toBe(false); + expect((pull.body.replica?.appliedLogIndex ?? 0) > baselineApplied).toBe(true); + }); + + test("replica pull advances appliedLogIndex after primary commits", async () => { + await openPrimary(); + await appendCommits(4); + + const replicaPath = join(tempDir, "replica.kitedb"); + const openReplica = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/db/open", + { + path: replicaPath, + options: { + replicationRole: "replica", + replicationSourceDbPath: dbPath, + }, + }, + ); + expect(openReplica.status).toBe(200); + expect(openReplica.body.success).toBe(true); + + const before = await requestJson<{ + connected: boolean; + role: string; + replica?: { appliedLogIndex?: number }; + }>("GET", "/api/replication/status"); + expect(before.status).toBe(200); + expect(before.body.connected).toBe(true); + expect(before.body.role).toBe("replica"); + const beforeIndex = before.body.replica?.appliedLogIndex ?? 0; + + const pull = await requestJson<{ + success: boolean; + appliedFrames?: number; + replica?: { appliedLogIndex?: number }; + }>("POST", "/api/replication/pull", { maxFrames: 64 }, AUTH_HEADER); + expect(pull.status).toBe(200); + expect(pull.body.success).toBe(true); + expect((pull.body.appliedFrames ?? 0) > 0).toBe(true); + + const after = await requestJson<{ + connected: boolean; + role: string; + replica?: { appliedLogIndex?: number }; + }>("GET", "/api/replication/status"); + expect(after.status).toBe(200); + expect(after.body.connected).toBe(true); + expect(after.body.role).toBe("replica"); + const afterIndex = after.body.replica?.appliedLogIndex ?? 0; + expect(afterIndex > beforeIndex).toBe(true); + }); + + test("promote increments epoch and replica catches up from promoted primary", async () => { + await openPrimary(); + await appendCommits(2); + + const promote = await requestJson<{ + success: boolean; + epoch?: number; + role?: string; + primary?: { epoch?: number }; + }>("POST", "/api/replication/promote", undefined, AUTH_HEADER); + expect(promote.status).toBe(200); + expect(promote.body.success).toBe(true); + expect(promote.body.role).toBe("primary"); + expect(promote.body.epoch).toBe(2); + expect(promote.body.primary?.epoch).toBe(2); + + await appendCommits(3); + + const replicaPath = join(tempDir, "replica-promoted.kitedb"); + const openReplica = await requestJson<{ success: boolean }>("POST", "/api/db/open", { + path: replicaPath, + options: { + replicationRole: "replica", + replicationSourceDbPath: dbPath, + }, + }); + expect(openReplica.status).toBe(200); + expect(openReplica.body.success).toBe(true); + + const pull = await requestJson<{ + success: boolean; + appliedFrames?: number; + replica?: { appliedEpoch?: number; appliedLogIndex?: number }; + }>("POST", "/api/replication/pull", { maxFrames: 128 }, AUTH_HEADER); + expect(pull.status).toBe(200); + expect(pull.body.success).toBe(true); + expect((pull.body.appliedFrames ?? 0) > 0).toBe(true); + expect((pull.body.replica?.appliedEpoch ?? 0) >= 2).toBe(true); + expect((pull.body.replica?.appliedLogIndex ?? 0) > 0).toBe(true); + }); + + test("reseed clears needsReseed after missing-segment failure", async () => { + await closeDatabase(); + tempDir = await mkdtemp(join(tmpdir(), "playground-repl-test-")); + dbPath = join(tempDir, "primary-needs-reseed.kitedb"); + const openPrimaryWithSmallSegments = await requestJson<{ success: boolean }>( + "POST", + "/api/db/open", + { + path: dbPath, + options: { + replicationRole: "primary", + replicationSegmentMaxBytes: 1, + }, + }, + ); + expect(openPrimaryWithSmallSegments.status).toBe(200); + expect(openPrimaryWithSmallSegments.body.success).toBe(true); + + await appendCommits(6); + + const primaryStatus = await requestJson<{ + connected: boolean; + role: string; + primary?: { sidecarPath?: string; headLogIndex?: number }; + }>("GET", "/api/replication/status"); + expect(primaryStatus.status).toBe(200); + expect(primaryStatus.body.connected).toBe(true); + expect(primaryStatus.body.role).toBe("primary"); + const sidecarPath = primaryStatus.body.primary?.sidecarPath; + const headLogIndex = primaryStatus.body.primary?.headLogIndex ?? 0; + expect(sidecarPath).toBeTruthy(); + expect(headLogIndex > 0).toBe(true); + + const replicaPath = join(tempDir, "replica-needs-reseed.kitedb"); + const openReplica = await requestJson<{ success: boolean }>("POST", "/api/db/open", { + path: replicaPath, + options: { + replicationRole: "replica", + replicationSourceDbPath: dbPath, + }, + }); + expect(openReplica.status).toBe(200); + expect(openReplica.body.success).toBe(true); + + const initialPull = await requestJson<{ success: boolean; appliedFrames?: number }>( + "POST", + "/api/replication/pull", + { maxFrames: 1 }, + AUTH_HEADER, + ); + expect(initialPull.status).toBe(200); + expect(initialPull.body.success).toBe(true); + expect((initialPull.body.appliedFrames ?? 0) > 0).toBe(true); + + const replicaStatusBefore = await requestJson<{ + connected: boolean; + role: string; + replica?: { appliedLogIndex?: number }; + }>("GET", "/api/replication/status"); + expect(replicaStatusBefore.status).toBe(200); + expect(replicaStatusBefore.body.role).toBe("replica"); + const appliedIndex = replicaStatusBefore.body.replica?.appliedLogIndex ?? 0; + expect(headLogIndex > appliedIndex).toBe(true); + + const manifestPath = join(sidecarPath!, "manifest.json"); + const envelope = JSON.parse( + await readFile(manifestPath, "utf8"), + ) as ManifestEnvelope; + + const expectedNext = appliedIndex + 1; + const gapSegment = envelope.manifest.segments.find( + (segment) => + segment.start_log_index <= expectedNext && + segment.end_log_index >= expectedNext, + ); + expect(gapSegment).toBeTruthy(); + const segmentPath = join( + sidecarPath!, + `segment-${String(gapSegment!.id).padStart(20, "0")}.rlog`, + ); + await rm(segmentPath, { force: true }); + + const pullAfterTamper = await requestJson<{ success: boolean; error?: string }>( + "POST", + "/api/replication/pull", + { maxFrames: 64 }, + AUTH_HEADER, + ); + expect(pullAfterTamper.status).toBe(200); + expect(pullAfterTamper.body.success).toBe(false); + expect(pullAfterTamper.body.error).toContain("needs reseed"); + + const replicaStatusAfter = await requestJson<{ + connected: boolean; + role: string; + replica?: { needsReseed?: boolean; lastError?: string }; + }>("GET", "/api/replication/status"); + expect(replicaStatusAfter.status).toBe(200); + expect(replicaStatusAfter.body.role).toBe("replica"); + expect(replicaStatusAfter.body.replica?.needsReseed).toBe(true); + expect(replicaStatusAfter.body.replica?.lastError).toContain("needs reseed"); + + const reseed = await requestJson<{ + success: boolean; + role: string; + replica?: { needsReseed?: boolean; lastError?: string | null }; + }>("POST", "/api/replication/reseed", undefined, AUTH_HEADER); + expect(reseed.status).toBe(200); + expect(reseed.body.success).toBe(true); + expect(reseed.body.role).toBe("replica"); + expect(reseed.body.replica?.needsReseed).toBe(false); + expect(reseed.body.replica?.lastError ?? null).toBeNull(); + + const replicaStatusAfterReseed = await requestJson<{ + connected: boolean; + role: string; + replica?: { needsReseed?: boolean; lastError?: string | null }; + }>("GET", "/api/replication/status"); + expect(replicaStatusAfterReseed.status).toBe(200); + expect(replicaStatusAfterReseed.body.role).toBe("replica"); + expect(replicaStatusAfterReseed.body.replica?.needsReseed).toBe(false); + expect(replicaStatusAfterReseed.body.replica?.lastError ?? null).toBeNull(); + + const pullAfterReseed = await requestJson<{ success: boolean; appliedFrames?: number }>( + "POST", + "/api/replication/pull", + { maxFrames: 64 }, + AUTH_HEADER, + ); + expect(pullAfterReseed.status).toBe(200); + expect(pullAfterReseed.body.success).toBe(true); + }); +}); diff --git a/playground/src/api/routes.ts b/playground/src/api/routes.ts index d42358b..7007490 100644 --- a/playground/src/api/routes.ts +++ b/playground/src/api/routes.ts @@ -5,10 +5,13 @@ */ import { Elysia, t } from "elysia"; -import { getSnapshot } from "../../../src/ray/graph-db/snapshot-helper.ts"; +import { createHash } from "node:crypto"; +import { join } from "node:path"; import { getDb, + getDbPath, getStatus, + type PlaygroundOpenOptions, openDatabase, openFromBuffer, createDemo, @@ -29,6 +32,10 @@ import { const MAX_NODES = 1000; const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB +const REPLICATION_PULL_MAX_FRAMES_DEFAULT = 256; +const REPLICATION_PULL_MAX_FRAMES_LIMIT = 10_000; +const REPLICATION_LOG_MAX_BYTES_DEFAULT = 1024 * 1024; +const REPLICATION_LOG_MAX_BYTES_LIMIT = 32 * 1024 * 1024; // ============================================================================ // Types @@ -48,6 +55,64 @@ interface VisEdge { type: string; } +interface RawReplicationStatus { + role?: string; + epoch?: number; + headLogIndex?: number; + retainedFloor?: number; + replicaLags?: Array<{ + replicaId: string; + epoch: number; + appliedLogIndex: number; + }>; + sidecarPath?: string; + lastToken?: string | null; + appendAttempts?: number; + appendFailures?: number; + appendSuccesses?: number; +} + +interface RawReplicaStatus { + role?: string; + appliedEpoch?: number; + appliedLogIndex?: number; + needsReseed?: boolean; + lastError?: string | null; +} + +interface ParsedReplicationCursor { + epoch: bigint; + segmentId: bigint; + segmentOffset: bigint; + logIndex: bigint; +} + +interface ReplicationFrameResponse { + epoch: string; + logIndex: string; + segmentId: string; + segmentOffset: string; + payloadBase64: string; + bytes: number; +} + +type ReplicationAdminAuthMode = + | "none" + | "token" + | "mtls" + | "token_or_mtls" + | "token_and_mtls"; + +interface ReplicationAdminConfig { + mode: ReplicationAdminAuthMode; + authEnabled: boolean; + token: string | null; + mtlsHeader: string; + mtlsSubjectRegex: RegExp | null; + mtlsNativeTlsEnabled: boolean; + invalidConfigError: string | null; +} + // ============================================================================ // Color scheme for node types // ============================================================================ @@ -83,6 +148,668 @@ function getEdgeDef(type: string) { } } +function getRawDb(): Record | null { + const db = getDb() as unknown as (Record & { $raw?: Record }) | null; + if (!db) { + return null; + } + return db.$raw ?? db; +} + +function callRawMethod( + raw: Record, + names: Array, + ...args: Array +): T { + for (const name of names) { + const candidate = raw[name]; + if (typeof candidate === "function") { + return (candidate as (...values: Array) => T).call(raw, ...args); + } + } + + throw new Error(`Replication method unavailable (${names.join(" | ")})`); +} + +function parseBooleanEnv(raw: string | undefined, defaultValue: boolean): boolean | null { + if (raw === undefined) { + return defaultValue; + } + + const normalized = raw.trim().toLowerCase(); + if (normalized === "") { + return defaultValue; + } + + if (normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on") { + return true; + } + if (normalized === "0" || normalized === "false" || normalized === "no" || normalized === "off") { + return false; + } + return null; +} + +function resolveReplicationAdminConfig(): ReplicationAdminConfig { + const tokenRaw = process.env.REPLICATION_ADMIN_TOKEN?.trim(); + const token = tokenRaw && tokenRaw.length > 0 ? tokenRaw : null; + + const modeRaw = process.env.REPLICATION_ADMIN_AUTH_MODE?.trim().toLowerCase(); + const mode: ReplicationAdminAuthMode = (() => { + if (!modeRaw || modeRaw === "") { + return token ? "token" : "none"; + } + + switch (modeRaw) { + case "none": + case "token": + case "mtls": + case "token_or_mtls": + case "token_and_mtls": + return modeRaw; + default: + return "none"; + } + })(); + + if (modeRaw && mode === "none" && modeRaw !== "none") { + return { + mode, + authEnabled: true, + token, + mtlsHeader: "x-forwarded-client-cert", + mtlsSubjectRegex: null, + mtlsNativeTlsEnabled: false, + invalidConfigError: + "Invalid REPLICATION_ADMIN_AUTH_MODE; expected none|token|mtls|token_or_mtls|token_and_mtls", + }; + } + + const mtlsHeaderRaw = process.env.REPLICATION_MTLS_HEADER?.trim().toLowerCase(); + const mtlsHeader = mtlsHeaderRaw && mtlsHeaderRaw.length > 0 + ? mtlsHeaderRaw + : "x-forwarded-client-cert"; + + const nativeTlsMode = parseBooleanEnv(process.env.REPLICATION_MTLS_NATIVE_TLS, false); + if (nativeTlsMode === null) { + return { + mode, + authEnabled: true, + token, + mtlsHeader, + mtlsSubjectRegex: null, + mtlsNativeTlsEnabled: false, + invalidConfigError: "Invalid REPLICATION_MTLS_NATIVE_TLS (expected boolean)", + }; + } + + if (nativeTlsMode) { + const tlsRequestCert = parseBooleanEnv(process.env.PLAYGROUND_TLS_REQUEST_CERT, false); + if (tlsRequestCert === null) { + return { + mode, + authEnabled: true, + token, + mtlsHeader, + mtlsSubjectRegex: null, + mtlsNativeTlsEnabled: false, + invalidConfigError: "Invalid PLAYGROUND_TLS_REQUEST_CERT (expected boolean)", + }; + } + + const tlsRejectUnauthorized = parseBooleanEnv(process.env.PLAYGROUND_TLS_REJECT_UNAUTHORIZED, true); + if (tlsRejectUnauthorized === null) { + return { + mode, + authEnabled: true, + token, + mtlsHeader, + mtlsSubjectRegex: null, + mtlsNativeTlsEnabled: false, + invalidConfigError: "Invalid PLAYGROUND_TLS_REJECT_UNAUTHORIZED (expected boolean)", + }; + } + + if (!tlsRequestCert || !tlsRejectUnauthorized) { + return { + mode, + authEnabled: true, + token, + mtlsHeader, + mtlsSubjectRegex: null, + mtlsNativeTlsEnabled: false, + invalidConfigError: + "REPLICATION_MTLS_NATIVE_TLS requires PLAYGROUND_TLS_REQUEST_CERT=true and PLAYGROUND_TLS_REJECT_UNAUTHORIZED=true", + }; + } + } + + const regexRaw = process.env.REPLICATION_MTLS_SUBJECT_REGEX?.trim(); + if (regexRaw && regexRaw.length > 0) { + try { + return { + mode, + authEnabled: mode !== "none", + token, + mtlsHeader, + mtlsSubjectRegex: new RegExp(regexRaw), + mtlsNativeTlsEnabled: nativeTlsMode, + invalidConfigError: null, + }; + } catch { + return { + mode, + authEnabled: true, + token, + mtlsHeader, + mtlsSubjectRegex: null, + mtlsNativeTlsEnabled: nativeTlsMode, + invalidConfigError: "Invalid REPLICATION_MTLS_SUBJECT_REGEX", + }; + } + } + + return { + mode, + authEnabled: mode !== "none", + token, + mtlsHeader, + mtlsSubjectRegex: null, + mtlsNativeTlsEnabled: nativeTlsMode, + invalidConfigError: null, + }; +} + +function matchesMtlsRequest(request: Request, config: ReplicationAdminConfig): boolean { + const headerValue = request.headers.get(config.mtlsHeader); + if (headerValue && headerValue.trim() !== "") { + if (!config.mtlsSubjectRegex) { + return true; + } + return config.mtlsSubjectRegex.test(headerValue); + } + + if (!config.mtlsNativeTlsEnabled || config.mtlsSubjectRegex) { + return false; + } + + try { + return new URL(request.url).protocol === "https:"; + } catch { + return false; + } +} + +function requireReplicationAdmin( + request: Request, + set: { status?: number }, +): { ok: true } | { ok: false; error: string } { + const config = resolveReplicationAdminConfig(); + if (config.invalidConfigError) { + set.status = 500; + return { ok: false, error: config.invalidConfigError }; + } + + if (config.mode === "none") { + return { ok: true }; + } + + const authHeader = request.headers.get("authorization"); + const tokenOk = config.token ? authHeader === `Bearer ${config.token}` : false; + const mtlsOk = matchesMtlsRequest(request, config); + + const authorized = (() => { + switch (config.mode) { + case "token": + return tokenOk; + case "mtls": + return mtlsOk; + case "token_or_mtls": + return tokenOk || mtlsOk; + case "token_and_mtls": + return tokenOk && mtlsOk; + case "none": + default: + return true; + } + })(); + + if (authorized) { + return { ok: true }; + } + + set.status = 401; + return { + ok: false, + error: `Unauthorized: replication admin auth mode '${config.mode}' not satisfied`, + }; +} + +function resolveReplicationStatus( + raw: Record, +): { + role: "primary" | "replica" | "disabled"; + primary: RawReplicationStatus | null; + replica: RawReplicaStatus | null; +} { + const primary = callRawMethod( + raw, + ["primaryReplicationStatus", "primary_replication_status"], + ); + const replica = callRawMethod( + raw, + ["replicaReplicationStatus", "replica_replication_status"], + ); + + const role = primary + ? "primary" + : replica + ? "replica" + : "disabled"; + + return { role, primary, replica }; +} + +function getSnapshot(rawDb: Record): Record | null { + const direct = rawDb._snapshot; + if (direct && typeof direct === "object") { + return direct as Record; + } + + const cached = rawDb._snapshotCache; + if (cached && typeof cached === "object") { + return cached as Record; + } + + return null; +} + +function parsePositiveInt( + value: unknown, + fallback: number, + min: number, + max: number, +): number { + if (value === undefined || value === null || value === "") { + return fallback; + } + + const parsed = Number(value); + if (!Number.isFinite(parsed)) { + return fallback; + } + + return Math.min(Math.max(Math.floor(parsed), min), max); +} + +function parseBoolean(value: unknown, fallback: boolean): boolean { + if (value === undefined || value === null || value === "") { + return fallback; + } + + if (typeof value === "boolean") { + return value; + } + + const text = String(value).toLowerCase().trim(); + if (text === "1" || text === "true" || text === "yes") { + return true; + } + if (text === "0" || text === "false" || text === "no") { + return false; + } + + return fallback; +} + +function parseReplicationCursor(raw: unknown): ParsedReplicationCursor | null { + if (typeof raw !== "string" || raw.trim() === "") { + return null; + } + + const token = raw.trim(); + const parts = token.split(":"); + if (parts.length === 2) { + const epoch = BigInt(parts[0]); + const logIndex = BigInt(parts[1]); + return { + epoch, + segmentId: 0n, + segmentOffset: 0n, + logIndex, + }; + } + + if (parts.length === 4) { + return { + epoch: BigInt(parts[0]), + segmentId: BigInt(parts[1]), + segmentOffset: BigInt(parts[2]), + logIndex: BigInt(parts[3]), + }; + } + + throw new Error( + "invalid cursor format; expected 'epoch:logIndex' or 'epoch:segmentId:segmentOffset:logIndex'", + ); +} + +function cursorAfterFrame( + cursor: ParsedReplicationCursor | null, + epoch: bigint, + segmentId: bigint, + segmentOffset: bigint, + logIndex: bigint, +): boolean { + if (!cursor) { + return true; + } + + if (epoch > cursor.epoch) { + return true; + } + if (epoch < cursor.epoch) { + return false; + } + + if (logIndex > cursor.logIndex) { + return true; + } + if (logIndex < cursor.logIndex) { + return false; + } + + if (cursor.segmentId === 0n) { + return false; + } + if (segmentId > cursor.segmentId) { + return true; + } + if (segmentId < cursor.segmentId) { + return false; + } + + return segmentOffset > cursor.segmentOffset; +} + +function formatSegmentFileName(id: bigint): string { + return `segment-${id.toString().padStart(20, "0")}.rlog`; +} + +async function readFileBytes(path: string): Promise { + const arrayBuffer = await Bun.file(path).arrayBuffer(); + return new Uint8Array(arrayBuffer); +} + +async function readManifestEnvelope(sidecarPath: string): Promise<{ + version: number; + payload_crc32: number; + manifest: { + epoch: number; + head_log_index: number; + retained_floor: number; + active_segment_id: number; + segments: Array<{ + id: number; + start_log_index: number; + end_log_index: number; + size_bytes: number; + }>; + }; +}> { + const manifestPath = join(sidecarPath, "manifest.json"); + const text = await Bun.file(manifestPath).text(); + return JSON.parse(text); +} + +function escapePrometheusLabelValue(value: string): string { + return value + .replaceAll("\\", "\\\\") + .replaceAll("\"", "\\\"") + .replaceAll("\n", "\\n"); +} + +function formatPrometheusLabels(labels: Record): string { + const entries = Object.entries(labels); + if (entries.length === 0) { + return ""; + } + const rendered = entries.map( + ([key, value]) => `${key}="${escapePrometheusLabelValue(String(value))}"`, + ); + return `{${rendered.join(",")}}`; +} + +function toMetricNumber(value: unknown, fallback = 0): number { + const parsed = Number(value); + if (!Number.isFinite(parsed)) { + return fallback; + } + return parsed; +} + +function pushPrometheusMetricHelp( + lines: Array, + metricName: string, + metricType: "gauge" | "counter", + helpText: string, +): void { + lines.push(`# HELP ${metricName} ${helpText}`); + lines.push(`# TYPE ${metricName} ${metricType}`); +} + +function pushPrometheusMetricSample( + lines: Array, + metricName: string, + value: number, + labels: Record = {}, +): void { + lines.push(`${metricName}${formatPrometheusLabels(labels)} ${value}`); +} + +function renderReplicationPrometheusMetrics( + resolved: { + role: "primary" | "replica" | "disabled"; + primary: RawReplicationStatus | null; + replica: RawReplicaStatus | null; + }, + authEnabled: boolean, +): string { + const lines: Array = []; + + pushPrometheusMetricHelp( + lines, + "raydb_replication_enabled", + "gauge", + "Whether replication is enabled for the connected database (1 enabled, 0 disabled).", + ); + pushPrometheusMetricSample(lines, "raydb_replication_enabled", resolved.role === "disabled" ? 0 : 1, { + role: resolved.role, + }); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_auth_enabled", + "gauge", + "Whether replication admin token auth is enabled for admin endpoints.", + ); + pushPrometheusMetricSample(lines, "raydb_replication_auth_enabled", authEnabled ? 1 : 0); + + if (resolved.primary) { + const epoch = toMetricNumber(resolved.primary.epoch, 0); + const headLogIndex = toMetricNumber(resolved.primary.headLogIndex, 0); + const retainedFloor = toMetricNumber(resolved.primary.retainedFloor, 0); + const replicaLags = resolved.primary.replicaLags ?? []; + + let staleReplicaCount = 0; + let maxReplicaLag = 0; + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_epoch", + "gauge", + "Primary replication epoch.", + ); + pushPrometheusMetricSample(lines, "raydb_replication_primary_epoch", epoch); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_head_log_index", + "gauge", + "Primary replication head log index.", + ); + pushPrometheusMetricSample(lines, "raydb_replication_primary_head_log_index", headLogIndex); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_retained_floor", + "gauge", + "Primary replication retained floor log index.", + ); + pushPrometheusMetricSample(lines, "raydb_replication_primary_retained_floor", retainedFloor); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_replica_count", + "gauge", + "Number of replicas reporting progress to the primary.", + ); + pushPrometheusMetricSample(lines, "raydb_replication_primary_replica_count", replicaLags.length); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_replica_lag", + "gauge", + "Replica lag in frames relative to primary head index.", + ); + for (const lag of replicaLags) { + const replicaEpoch = toMetricNumber(lag.epoch, 0); + const appliedLogIndex = toMetricNumber(lag.appliedLogIndex, 0); + const lagFrames = replicaEpoch === epoch + ? Math.max(0, headLogIndex - appliedLogIndex) + : Math.max(0, headLogIndex); + if (replicaEpoch !== epoch) { + staleReplicaCount += 1; + } + maxReplicaLag = Math.max(maxReplicaLag, lagFrames); + pushPrometheusMetricSample( + lines, + "raydb_replication_primary_replica_lag", + lagFrames, + { + replica_id: lag.replicaId, + replica_epoch: replicaEpoch, + }, + ); + } + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_stale_epoch_replica_count", + "gauge", + "Count of replicas reporting progress from a stale epoch.", + ); + pushPrometheusMetricSample( + lines, + "raydb_replication_primary_stale_epoch_replica_count", + staleReplicaCount, + ); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_max_replica_lag", + "gauge", + "Maximum replica lag in frames among replicas reporting progress.", + ); + pushPrometheusMetricSample(lines, "raydb_replication_primary_max_replica_lag", maxReplicaLag); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_append_attempts_total", + "counter", + "Total replication append attempts on primary commit path.", + ); + pushPrometheusMetricSample( + lines, + "raydb_replication_primary_append_attempts_total", + toMetricNumber(resolved.primary.appendAttempts, 0), + ); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_append_failures_total", + "counter", + "Total replication append failures on primary commit path.", + ); + pushPrometheusMetricSample( + lines, + "raydb_replication_primary_append_failures_total", + toMetricNumber(resolved.primary.appendFailures, 0), + ); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_primary_append_successes_total", + "counter", + "Total replication append successes on primary commit path.", + ); + pushPrometheusMetricSample( + lines, + "raydb_replication_primary_append_successes_total", + toMetricNumber(resolved.primary.appendSuccesses, 0), + ); + } + + if (resolved.replica) { + pushPrometheusMetricHelp( + lines, + "raydb_replication_replica_applied_epoch", + "gauge", + "Replica applied epoch.", + ); + pushPrometheusMetricSample( + lines, + "raydb_replication_replica_applied_epoch", + toMetricNumber(resolved.replica.appliedEpoch, 0), + ); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_replica_applied_log_index", + "gauge", + "Replica applied log index.", + ); + pushPrometheusMetricSample( + lines, + "raydb_replication_replica_applied_log_index", + toMetricNumber(resolved.replica.appliedLogIndex, 0), + ); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_replica_needs_reseed", + "gauge", + "Whether replica currently requires reseed (1 yes, 0 no).", + ); + pushPrometheusMetricSample( + lines, + "raydb_replication_replica_needs_reseed", + resolved.replica.needsReseed ? 1 : 0, + ); + + pushPrometheusMetricHelp( + lines, + "raydb_replication_replica_last_error_present", + "gauge", + "Whether replica has a non-empty last_error value (1 yes, 0 no).", + ); + const hasError = resolved.replica.lastError ? 1 : 0; + pushPrometheusMetricSample(lines, "raydb_replication_replica_last_error_present", hasError); + } + + return `${lines.join("\n")}\n`; +} + // ============================================================================ // API Routes // ============================================================================ @@ -95,17 +822,519 @@ export const apiRoutes = new Elysia({ prefix: "/api" }) return await getStatus(); }) + // -------------------------------------------------------------------------- + // Replication (status / pull / promote) + // -------------------------------------------------------------------------- + .get("/replication/status", async () => { + const raw = getRawDb(); + if (!raw) { + return { + connected: false, + error: "No database connected", + }; + } + + try { + const resolved = resolveReplicationStatus(raw); + return { + connected: true, + authEnabled: resolveReplicationAdminConfig().authEnabled, + role: resolved.role, + primary: resolved.primary, + replica: resolved.replica, + }; + } catch (error) { + return { + connected: true, + error: + error instanceof Error + ? error.message + : "Failed to query replication status", + }; + } + }) + + .get("/replication/metrics", async ({ request, set }) => { + const auth = requireReplicationAdmin(request, set); + if (!auth.ok) { + return new Response(auth.error, { + status: set.status ?? 401, + headers: { "Content-Type": "text/plain; charset=utf-8" }, + }); + } + + const raw = getRawDb(); + if (!raw) { + return new Response("No database connected", { + status: 503, + headers: { "Content-Type": "text/plain; charset=utf-8" }, + }); + } + + try { + const resolved = resolveReplicationStatus(raw); + const text = renderReplicationPrometheusMetrics( + resolved, + resolveReplicationAdminConfig().authEnabled, + ); + return new Response(text, { + headers: { + "Content-Type": "text/plain; version=0.0.4; charset=utf-8", + "Cache-Control": "no-store", + }, + }); + } catch (error) { + return new Response( + error instanceof Error ? error.message : "Failed to render replication metrics", + { + status: 500, + headers: { "Content-Type": "text/plain; charset=utf-8" }, + }, + ); + } + }) + + .get("/replication/snapshot/latest", async ({ query, request, set }) => { + const auth = requireReplicationAdmin(request, set); + if (!auth.ok) { + return { success: false, error: auth.error }; + } + + const raw = getRawDb(); + if (!raw) { + return { success: false, error: "No database connected" }; + } + + try { + const resolved = resolveReplicationStatus(raw); + if (resolved.role !== "primary" || !resolved.primary) { + return { + success: false, + error: "Replication snapshot endpoint requires primary role", + }; + } + + const dbPath = getDbPath(); + if (!dbPath) { + return { success: false, error: "Database path unavailable" }; + } + + const includeData = parseBoolean((query as Record).includeData, false); + const bytes = await readFileBytes(dbPath); + const sha256 = createHash("sha256").update(bytes).digest("hex"); + + return { + success: true, + role: resolved.role, + epoch: resolved.primary.epoch ?? null, + headLogIndex: resolved.primary.headLogIndex ?? null, + snapshot: { + format: "single-file-db-copy", + dbPath, + byteLength: bytes.byteLength, + sha256, + generatedAt: new Date().toISOString(), + dataBase64: includeData ? Buffer.from(bytes).toString("base64") : undefined, + }, + }; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Failed to prepare replication snapshot", + }; + } + }) + + .get("/replication/log", async ({ query, request, set }) => { + const auth = requireReplicationAdmin(request, set); + if (!auth.ok) { + return { success: false, error: auth.error }; + } + + const raw = getRawDb(); + if (!raw) { + return { success: false, error: "No database connected" }; + } + + try { + const resolved = resolveReplicationStatus(raw); + if (resolved.role !== "primary" || !resolved.primary?.sidecarPath) { + return { + success: false, + error: "Replication log endpoint requires primary role with sidecar", + }; + } + + const queryObject = query as Record; + const maxBytes = parsePositiveInt( + queryObject.maxBytes, + REPLICATION_LOG_MAX_BYTES_DEFAULT, + 1, + REPLICATION_LOG_MAX_BYTES_LIMIT, + ); + const maxFrames = parsePositiveInt( + queryObject.maxFrames, + REPLICATION_PULL_MAX_FRAMES_DEFAULT, + 1, + REPLICATION_PULL_MAX_FRAMES_LIMIT, + ); + const includePayload = parseBoolean(queryObject.includePayload, true); + const cursor = parseReplicationCursor(queryObject.cursor); + + const envelope = await readManifestEnvelope(resolved.primary.sidecarPath); + const manifest = envelope.manifest; + const segments = [...manifest.segments].sort((left, right) => left.id - right.id); + + const frames: Array = []; + let totalBytes = 0; + let nextCursor = typeof queryObject.cursor === "string" ? queryObject.cursor : null; + let limited = false; + + outer: for (const segment of segments) { + const segmentId = BigInt(segment.id); + const segmentPath = join( + resolved.primary.sidecarPath, + formatSegmentFileName(segmentId), + ); + + const segmentBytes = await readFileBytes(segmentPath); + const view = new DataView( + segmentBytes.buffer, + segmentBytes.byteOffset, + segmentBytes.byteLength, + ); + + let offset = 0; + while (offset + 32 <= segmentBytes.byteLength) { + const magic = view.getUint32(offset, true); + if (magic !== 0x474f4c52) { + break; + } + + const _version = view.getUint16(offset + 4, true); + const _flags = view.getUint16(offset + 6, true); + const epoch = view.getBigUint64(offset + 8, true); + const logIndex = view.getBigUint64(offset + 16, true); + const payloadLength = view.getUint32(offset + 24, true); + const payloadOffset = offset + 32; + const payloadEnd = payloadOffset + payloadLength; + if (payloadEnd > segmentBytes.byteLength) { + break; + } + + const frameBytes = payloadEnd - offset; + const frameOffset = BigInt(offset); + const frameAfterCursor = cursorAfterFrame( + cursor, + epoch, + segmentId, + frameOffset, + logIndex, + ); + + if (frameAfterCursor) { + if ((totalBytes + frameBytes > maxBytes && frames.length > 0) || frames.length >= maxFrames) { + limited = true; + break outer; + } + + const payload = segmentBytes.subarray(payloadOffset, payloadEnd); + const nextOffset = BigInt(payloadEnd); + nextCursor = `${epoch}:${segmentId}:${nextOffset}:${logIndex}`; + + frames.push({ + epoch: epoch.toString(), + logIndex: logIndex.toString(), + segmentId: segmentId.toString(), + segmentOffset: frameOffset.toString(), + payloadBase64: includePayload + ? Buffer.from(payload).toString("base64") + : "", + bytes: frameBytes, + }); + totalBytes += frameBytes; + } + + offset = payloadEnd; + } + } + + return { + success: true, + role: resolved.role, + epoch: manifest.epoch, + headLogIndex: manifest.head_log_index, + retainedFloor: manifest.retained_floor, + cursor: typeof queryObject.cursor === "string" ? queryObject.cursor : null, + nextCursor, + eof: !limited, + frameCount: frames.length, + totalBytes, + frames, + }; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Failed to fetch replication log", + }; + } + }) + + .get("/replication/transport/snapshot", async ({ query, request, set }) => { + const auth = requireReplicationAdmin(request, set); + if (!auth.ok) { + return { success: false, error: auth.error }; + } + + const raw = getRawDb(); + if (!raw) { + return { success: false, error: "No database connected" }; + } + + try { + const includeData = parseBoolean((query as Record).includeData, false); + const exported = callRawMethod( + raw, + [ + "exportReplicationSnapshotTransportJson", + "export_replication_snapshot_transport_json", + ], + includeData, + ); + const snapshot = JSON.parse(exported) as Record; + return { + success: true, + snapshot, + }; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Failed to export replication transport snapshot", + }; + } + }) + + .get("/replication/transport/log", async ({ query, request, set }) => { + const auth = requireReplicationAdmin(request, set); + if (!auth.ok) { + return { success: false, error: auth.error }; + } + + const raw = getRawDb(); + if (!raw) { + return { success: false, error: "No database connected" }; + } + + try { + const queryObject = query as Record; + const maxBytes = parsePositiveInt( + queryObject.maxBytes, + REPLICATION_LOG_MAX_BYTES_DEFAULT, + 1, + REPLICATION_LOG_MAX_BYTES_LIMIT, + ); + const maxFrames = parsePositiveInt( + queryObject.maxFrames, + REPLICATION_PULL_MAX_FRAMES_DEFAULT, + 1, + REPLICATION_PULL_MAX_FRAMES_LIMIT, + ); + const includePayload = parseBoolean(queryObject.includePayload, true); + const cursor = typeof queryObject.cursor === "string" ? queryObject.cursor : null; + + const exported = callRawMethod( + raw, + [ + "exportReplicationLogTransportJson", + "export_replication_log_transport_json", + ], + cursor, + maxFrames, + maxBytes, + includePayload, + ); + const payload = JSON.parse(exported) as Record; + return { + success: true, + ...(payload as object), + }; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Failed to export replication transport log", + }; + } + }) + + .post( + "/replication/pull", + async ({ body, request, set }) => { + const auth = requireReplicationAdmin(request, set); + if (!auth.ok) { + return { success: false, error: auth.error }; + } + + const raw = getRawDb(); + if (!raw) { + return { success: false, error: "No database connected" }; + } + + const maxFrames = Math.min( + Math.max(body.maxFrames ?? REPLICATION_PULL_MAX_FRAMES_DEFAULT, 1), + REPLICATION_PULL_MAX_FRAMES_LIMIT, + ); + + try { + const applied = callRawMethod( + raw, + ["replicaCatchUpOnce", "replica_catch_up_once"], + maxFrames, + ); + const resolved = resolveReplicationStatus(raw); + + return { + success: true, + appliedFrames: applied, + role: resolved.role, + replica: resolved.replica, + }; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Replication pull failed", + }; + } + }, + { + body: t.Object({ + maxFrames: t.Optional(t.Number()), + }), + }, + ) + + .post("/replication/reseed", async ({ request, set }) => { + const auth = requireReplicationAdmin(request, set); + if (!auth.ok) { + return { success: false, error: auth.error }; + } + + const raw = getRawDb(); + if (!raw) { + return { success: false, error: "No database connected" }; + } + + try { + callRawMethod( + raw, + ["replicaReseedFromSnapshot", "replica_reseed_from_snapshot"], + ); + const resolved = resolveReplicationStatus(raw); + + return { + success: true, + role: resolved.role, + replica: resolved.replica, + }; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Replica reseed failed", + }; + } + }) + + .post("/replication/promote", async ({ request, set }) => { + const auth = requireReplicationAdmin(request, set); + if (!auth.ok) { + return { success: false, error: auth.error }; + } + + const raw = getRawDb(); + if (!raw) { + return { success: false, error: "No database connected" }; + } + + try { + const epoch = callRawMethod( + raw, + ["primaryPromoteToNextEpoch", "primary_promote_to_next_epoch"], + ); + const resolved = resolveReplicationStatus(raw); + + return { + success: true, + epoch, + role: resolved.role, + primary: resolved.primary, + }; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Primary promote failed", + }; + } + }) + // -------------------------------------------------------------------------- // Database Management // -------------------------------------------------------------------------- .post( "/db/open", async ({ body }) => { - return await openDatabase(body.path); + return await openDatabase(body.path, body.options as PlaygroundOpenOptions | undefined); }, { body: t.Object({ path: t.String(), + options: t.Optional( + t.Object({ + readOnly: t.Optional(t.Boolean()), + createIfMissing: t.Optional(t.Boolean()), + mvcc: t.Optional(t.Boolean()), + mvccGcIntervalMs: t.Optional(t.Number()), + mvccRetentionMs: t.Optional(t.Number()), + mvccMaxChainDepth: t.Optional(t.Number()), + syncMode: t.Optional(t.Union([t.Literal("Full"), t.Literal("Normal"), t.Literal("Off")])), + groupCommitEnabled: t.Optional(t.Boolean()), + groupCommitWindowMs: t.Optional(t.Number()), + walSizeMb: t.Optional(t.Number()), + checkpointThreshold: t.Optional(t.Number()), + replicationRole: t.Optional( + t.Union([ + t.Literal("disabled"), + t.Literal("primary"), + t.Literal("replica"), + ]), + ), + replicationSidecarPath: t.Optional(t.String()), + replicationSourceDbPath: t.Optional(t.String()), + replicationSourceSidecarPath: t.Optional(t.String()), + replicationSegmentMaxBytes: t.Optional(t.Number()), + replicationRetentionMinEntries: t.Optional(t.Number()), + replicationRetentionMinMs: t.Optional(t.Number()), + }), + ), }), } ) diff --git a/playground/src/client/lib/api.ts b/playground/src/client/lib/api.ts index f9e2272..783289c 100644 --- a/playground/src/client/lib/api.ts +++ b/playground/src/client/lib/api.ts @@ -11,10 +11,37 @@ import type { PathResponse, ImpactResponse, ApiResult, + ReplicationStatusResponse, + ReplicationSnapshotResponse, + ReplicationLogResponse, + ReplicationPullResponse, + ReplicationReseedResponse, + ReplicationPromoteResponse, } from "./types.ts"; const API_BASE = "/api"; +export interface DbOpenOptions { + readOnly?: boolean + createIfMissing?: boolean + mvcc?: boolean + mvccGcIntervalMs?: number + mvccRetentionMs?: number + mvccMaxChainDepth?: number + syncMode?: "Full" | "Normal" | "Off" + groupCommitEnabled?: boolean + groupCommitWindowMs?: number + walSizeMb?: number + checkpointThreshold?: number + replicationRole?: "disabled" | "primary" | "replica" + replicationSidecarPath?: string + replicationSourceDbPath?: string + replicationSourceSidecarPath?: string + replicationSegmentMaxBytes?: number + replicationRetentionMinEntries?: number + replicationRetentionMinMs?: number +} + // ============================================================================ // Helper // ============================================================================ @@ -35,6 +62,28 @@ async function fetchJson(url: string, options?: RequestInit): Promise { return response.json(); } +async function fetchText(url: string, options?: RequestInit): Promise { + const response = await fetch(`${API_BASE}${url}`, { + ...options, + headers: { + ...options?.headers, + }, + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + return response.text(); +} + +function withAuthHeader(token?: string): HeadersInit | undefined { + if (!token || token.trim() === "") { + return undefined; + } + return { Authorization: `Bearer ${token}` }; +} + // ============================================================================ // Database Management // ============================================================================ @@ -43,10 +92,13 @@ export async function getStatus(): Promise { return fetchJson("/status"); } -export async function openDatabase(path: string): Promise { +export async function openDatabase(path: string, options?: DbOpenOptions): Promise { return fetchJson("/db/open", { method: "POST", - body: JSON.stringify({ path }), + body: JSON.stringify({ + path, + ...(options ? { options } : {}), + }), }); } @@ -74,6 +126,110 @@ export async function closeDatabase(): Promise { }); } +// ============================================================================ +// Replication +// ============================================================================ + +export interface ReplicationAuthOptions { + adminToken?: string +} + +export interface ReplicationSnapshotOptions extends ReplicationAuthOptions { + includeData?: boolean +} + +export interface ReplicationLogOptions extends ReplicationAuthOptions { + cursor?: string + maxBytes?: number + maxFrames?: number + includePayload?: boolean +} + +export interface ReplicationPullOptions extends ReplicationAuthOptions { + maxFrames?: number +} + +export async function getReplicationStatus(): Promise { + return fetchJson("/replication/status"); +} + +export async function getReplicationMetricsPrometheus( + options?: ReplicationAuthOptions, +): Promise { + return fetchText("/replication/metrics", { + headers: withAuthHeader(options?.adminToken), + }); +} + +export async function getReplicationSnapshotLatest( + options?: ReplicationSnapshotOptions, +): Promise { + const params = new URLSearchParams(); + if (typeof options?.includeData === "boolean") { + params.set("includeData", options.includeData ? "true" : "false"); + } + const query = params.size > 0 ? `?${params.toString()}` : ""; + + return fetchJson(`/replication/snapshot/latest${query}`, { + headers: withAuthHeader(options?.adminToken), + }); +} + +export async function getReplicationLog( + options?: ReplicationLogOptions, +): Promise { + const params = new URLSearchParams(); + if (options?.cursor) { + params.set("cursor", options.cursor); + } + if (typeof options?.maxBytes === "number") { + params.set("maxBytes", String(options.maxBytes)); + } + if (typeof options?.maxFrames === "number") { + params.set("maxFrames", String(options.maxFrames)); + } + if (typeof options?.includePayload === "boolean") { + params.set("includePayload", options.includePayload ? "true" : "false"); + } + const query = params.size > 0 ? `?${params.toString()}` : ""; + + return fetchJson(`/replication/log${query}`, { + headers: withAuthHeader(options?.adminToken), + }); +} + +export async function pullReplicaOnce( + options?: ReplicationPullOptions, +): Promise { + return fetchJson("/replication/pull", { + method: "POST", + headers: withAuthHeader(options?.adminToken), + body: JSON.stringify( + typeof options?.maxFrames === "number" + ? { maxFrames: options.maxFrames } + : {}, + ), + }); +} + +export async function reseedReplica( + options?: ReplicationAuthOptions, +): Promise { + return fetchJson("/replication/reseed", { + method: "POST", + headers: withAuthHeader(options?.adminToken), + }); +} + +export async function promotePrimary( + options?: ReplicationAuthOptions, +): Promise { + return fetchJson("/replication/promote", { + method: "POST", + headers: withAuthHeader(options?.adminToken), + }); +} + // ============================================================================ // Stats // ============================================================================ diff --git a/playground/src/client/lib/types.ts b/playground/src/client/lib/types.ts index 97acbdf..26cb2d0 100644 --- a/playground/src/client/lib/types.ts +++ b/playground/src/client/lib/types.ts @@ -65,6 +65,98 @@ export interface ApiResult { error?: string; } +export interface ReplicationReplicaLag { + replicaId: string; + epoch: number; + appliedLogIndex: number; +} + +export interface PrimaryReplicationStatus { + role?: string; + epoch?: number; + headLogIndex?: number; + retainedFloor?: number; + replicaLags?: ReplicationReplicaLag[]; + sidecarPath?: string; + lastToken?: string | null; + appendAttempts?: number; + appendFailures?: number; + appendSuccesses?: number; +} + +export interface ReplicaReplicationStatus { + role?: string; + appliedEpoch?: number; + appliedLogIndex?: number; + needsReseed?: boolean; + lastError?: string | null; +} + +export interface ReplicationStatusResponse { + connected: boolean; + authEnabled?: boolean; + role?: "primary" | "replica" | "disabled"; + primary?: PrimaryReplicationStatus | null; + replica?: ReplicaReplicationStatus | null; + error?: string; +} + +export interface ReplicationSnapshotResponse extends ApiResult { + role?: "primary" | "replica" | "disabled"; + epoch?: number | null; + headLogIndex?: number | null; + snapshot?: { + format: string; + dbPath: string; + byteLength: number; + sha256: string; + generatedAt: string; + dataBase64?: string; + }; +} + +export interface ReplicationLogFrame { + epoch: string; + logIndex: string; + segmentId: string; + segmentOffset: string; + payloadBase64: string; + bytes: number; +} + +export interface ReplicationLogResponse extends ApiResult { + role?: "primary" | "replica" | "disabled"; + epoch?: number | null; + headLogIndex?: number | null; + retainedFloor?: number | null; + request?: { + maxBytes: number; + maxFrames: number; + includePayload: boolean; + cursor: string | null; + }; + frames?: ReplicationLogFrame[]; + nextCursor?: string | null; + eof?: boolean; +} + +export interface ReplicationPullResponse extends ApiResult { + role?: "primary" | "replica" | "disabled"; + appliedFrames?: number; + replica?: ReplicaReplicationStatus | null; +} + +export interface ReplicationReseedResponse extends ApiResult { + role?: "primary" | "replica" | "disabled"; + replica?: ReplicaReplicationStatus | null; +} + +export interface ReplicationPromoteResponse extends ApiResult { + role?: "primary" | "replica" | "disabled"; + epoch?: number | null; + primary?: PrimaryReplicationStatus | null; +} + // ============================================================================ // UI State Types // ============================================================================ diff --git a/playground/src/server.ts b/playground/src/server.ts index dc5cdf6..d8543db 100644 --- a/playground/src/server.ts +++ b/playground/src/server.ts @@ -7,6 +7,7 @@ import { Elysia } from "elysia"; import { cors } from "@elysiajs/cors"; import { apiRoutes } from "./api/routes.ts"; +import { existsSync } from "node:fs"; import { join } from "node:path"; const PORT = process.env.PORT ? parseInt(process.env.PORT) : 3000; @@ -24,6 +25,83 @@ const getContentType = (path: string): string => { return "application/octet-stream"; }; +type TlsFile = ReturnType; + +interface PlaygroundTlsConfig { + enabled: boolean; + protocol: "http" | "https"; + tls?: { + cert: TlsFile; + key: TlsFile; + ca?: TlsFile; + requestCert: boolean; + rejectUnauthorized: boolean; + }; +} + +function parseBooleanEnv(name: string, raw: string | undefined, defaultValue: boolean): boolean { + if (raw === undefined) { + return defaultValue; + } + + const normalized = raw.trim().toLowerCase(); + if (normalized === "") { + return defaultValue; + } + if (normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on") { + return true; + } + if (normalized === "0" || normalized === "false" || normalized === "no" || normalized === "off") { + return false; + } + throw new Error(`Invalid ${name} (expected boolean)`); +} + +export function resolvePlaygroundTlsConfig(env: NodeJS.ProcessEnv = process.env): PlaygroundTlsConfig { + const certFile = env.PLAYGROUND_TLS_CERT_FILE?.trim(); + const keyFile = env.PLAYGROUND_TLS_KEY_FILE?.trim(); + const caFile = env.PLAYGROUND_TLS_CA_FILE?.trim(); + + const hasCert = Boolean(certFile && certFile.length > 0); + const hasKey = Boolean(keyFile && keyFile.length > 0); + if (hasCert !== hasKey) { + throw new Error("PLAYGROUND_TLS_CERT_FILE and PLAYGROUND_TLS_KEY_FILE must both be set for TLS"); + } + + if (!hasCert || !hasKey) { + return { enabled: false, protocol: "http" }; + } + + if (!existsSync(certFile!)) { + throw new Error(`PLAYGROUND_TLS_CERT_FILE does not exist: ${certFile}`); + } + if (!existsSync(keyFile!)) { + throw new Error(`PLAYGROUND_TLS_KEY_FILE does not exist: ${keyFile}`); + } + if (caFile && caFile.length > 0 && !existsSync(caFile)) { + throw new Error(`PLAYGROUND_TLS_CA_FILE does not exist: ${caFile}`); + } + + const requestCert = parseBooleanEnv("PLAYGROUND_TLS_REQUEST_CERT", env.PLAYGROUND_TLS_REQUEST_CERT, false); + const rejectUnauthorized = parseBooleanEnv( + "PLAYGROUND_TLS_REJECT_UNAUTHORIZED", + env.PLAYGROUND_TLS_REJECT_UNAUTHORIZED, + true, + ); + + return { + enabled: true, + protocol: "https", + tls: { + cert: Bun.file(certFile!), + key: Bun.file(keyFile!), + ...(caFile && caFile.length > 0 ? { ca: Bun.file(caFile) } : {}), + requestCert, + rejectUnauthorized, + }, + }; +} + export const app = new Elysia() // Enable CORS for development .use(cors({ @@ -59,12 +137,19 @@ let server: ReturnType | null = null; if (import.meta.main) { try { + const tlsConfig = resolvePlaygroundTlsConfig(); server = app.listen({ port: PORT, hostname: "0.0.0.0", + ...(tlsConfig.tls ? { tls: tlsConfig.tls } : {}), }); const actualPort = server.server?.port ?? PORT; - console.log(`RayDB Playground running at http://localhost:${actualPort}`); + console.log(`RayDB Playground running at ${tlsConfig.protocol}://localhost:${actualPort}`); + if (tlsConfig.enabled) { + console.log( + `TLS enabled (requestCert=${tlsConfig.tls?.requestCert ? "true" : "false"}, rejectUnauthorized=${tlsConfig.tls?.rejectUnauthorized ? "true" : "false"})`, + ); + } } catch (err) { console.error("Failed to start server", err); process.exit(1); diff --git a/promo/.gitignore b/promo/.gitignore new file mode 100644 index 0000000..f7f4c24 --- /dev/null +++ b/promo/.gitignore @@ -0,0 +1,4 @@ +node_modules/ +out/ +dist/ +.remotion/ diff --git a/promo/package-lock.json b/promo/package-lock.json new file mode 100644 index 0000000..67a5cff --- /dev/null +++ b/promo/package-lock.json @@ -0,0 +1,2714 @@ +{ + "name": "kitedb-promo", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "kitedb-promo", + "version": "1.0.0", + "dependencies": { + "@remotion/bundler": "^4.0.420", + "@remotion/cli": "^4.0.420", + "react": "^19.2.4", + "react-dom": "^19.2.4", + "remotion": "^4.0.420" + }, + "devDependencies": { + "@types/react": "^19.2.13", + "@types/react-dom": "^19.2.3", + "typescript": "^5.9.3" + } + }, + "node_modules/@babel/parser": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.24.1.tgz", + "integrity": "sha512-Zo9c7N3xdOIQrNip7Lc9wvRPzlRtovHVE4lkz8WEDr7uYh/GMQhSiIgFxGIArRHYdJE5kxtZjAf8rT0xhdLCzg==", + "license": "MIT", + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.0.tgz", + "integrity": "sha512-O7vun9Sf8DFjH2UtqK8Ku3LkquL9SZL8OLY1T5NZkA34+wG3OQF7cl4Ql8vdNzM6fzBbYfLaiRLIOZ+2FOCgBQ==", + "cpu": [ + "ppc64" + ], + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.0.tgz", + "integrity": "sha512-PTyWCYYiU0+1eJKmw21lWtC+d08JDZPQ5g+kFyxP0V+es6VPPSUhM6zk8iImp2jbV6GwjX4pap0JFbUQN65X1g==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.0.tgz", + "integrity": "sha512-grvv8WncGjDSyUBjN9yHXNt+cq0snxXbDxy5pJtzMKGmmpPxeAmAhWxXI+01lU5rwZomDgD3kJwulEnhTRUd6g==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.0.tgz", + "integrity": "sha512-m/ix7SfKG5buCnxasr52+LI78SQ+wgdENi9CqyCXwjVR2X4Jkz+BpC3le3AoBPYTC9NHklwngVXvbJ9/Akhrfg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.0.tgz", + "integrity": "sha512-mVwdUb5SRkPayVadIOI78K7aAnPamoeFR2bT5nszFUZ9P8UpK4ratOdYbZZXYSqPKMHfS1wdHCJk1P1EZpRdvw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.0.tgz", + "integrity": "sha512-DgDaYsPWFTS4S3nWpFcMn/33ZZwAAeAFKNHNa1QN0rI4pUjgqf0f7ONmXf6d22tqTY+H9FNdgeaAa+YIFUn2Rg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.0.tgz", + "integrity": "sha512-VN4ocxy6dxefN1MepBx/iD1dH5K8qNtNe227I0mnTRjry8tj5MRk4zprLEdG8WPyAPb93/e4pSgi1SoHdgOa4w==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.0.tgz", + "integrity": "sha512-mrSgt7lCh07FY+hDD1TxiTyIHyttn6vnjesnPoVDNmDfOmggTLXRv8Id5fNZey1gl/V2dyVK1VXXqVsQIiAk+A==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.0.tgz", + "integrity": "sha512-vkB3IYj2IDo3g9xX7HqhPYxVkNQe8qTK55fraQyTzTX/fxaDtXiEnavv9geOsonh2Fd2RMB+i5cbhu2zMNWJwg==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.0.tgz", + "integrity": "sha512-9QAQjTWNDM/Vk2bgBl17yWuZxZNQIF0OUUuPZRKoDtqF2k4EtYbpyiG5/Dk7nqeK6kIJWPYldkOcBqjXjrUlmg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.0.tgz", + "integrity": "sha512-43ET5bHbphBegyeqLb7I1eYn2P/JYGNmzzdidq/w0T8E2SsYL1U6un2NFROFRg1JZLTzdCoRomg8Rvf9M6W6Gg==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.0.tgz", + "integrity": "sha512-fC95c/xyNFueMhClxJmeRIj2yrSMdDfmqJnyOY4ZqsALkDrrKJfIg5NTMSzVBr5YW1jf+l7/cndBfP3MSDpoHw==", + "cpu": [ + "loong64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.0.tgz", + "integrity": "sha512-nkAMFju7KDW73T1DdH7glcyIptm95a7Le8irTQNO/qtkoyypZAnjchQgooFUDQhNAy4iu08N79W4T4pMBwhPwQ==", + "cpu": [ + "mips64el" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.0.tgz", + "integrity": "sha512-NhyOejdhRGS8Iwv+KKR2zTq2PpysF9XqY+Zk77vQHqNbo/PwZCzB5/h7VGuREZm1fixhs4Q/qWRSi5zmAiO4Fw==", + "cpu": [ + "ppc64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.0.tgz", + "integrity": "sha512-5S/rbP5OY+GHLC5qXp1y/Mx//e92L1YDqkiBbO9TQOvuFXM+iDqUNG5XopAnXoRH3FjIUDkeGcY1cgNvnXp/kA==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.0.tgz", + "integrity": "sha512-XM2BFsEBz0Fw37V0zU4CXfcfuACMrppsMFKdYY2WuTS3yi8O1nFOhil/xhKTmE1nPmVyvQJjJivgDT+xh8pXJA==", + "cpu": [ + "s390x" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.0.tgz", + "integrity": "sha512-9yl91rHw/cpwMCNytUDxwj2XjFpxML0y9HAOH9pNVQDpQrBxHy01Dx+vaMu0N1CKa/RzBD2hB4u//nfc+Sd3Cw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.0.tgz", + "integrity": "sha512-RuG4PSMPFfrkH6UwCAqBzauBWTygTvb1nxWasEJooGSJ/NwRw7b2HOwyRTQIU97Hq37l3npXoZGYMy3b3xYvPw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.0.tgz", + "integrity": "sha512-jl+qisSB5jk01N5f7sPCsBENCOlPiS/xptD5yxOx2oqQfyourJwIKLRA2yqWdifj3owQZCL2sn6o08dBzZGQzA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.0.tgz", + "integrity": "sha512-21sUNbq2r84YE+SJDfaQRvdgznTD8Xc0oc3p3iW/a1EVWeNj/SdUCbm5U0itZPQYRuRTW20fPMWMpcrciH2EJw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.0.tgz", + "integrity": "sha512-2gwwriSMPcCFRlPlKx3zLQhfN/2WjJ2NSlg5TKLQOJdV0mSxIcYNTMhk3H3ulL/cak+Xj0lY1Ym9ysDV1igceg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.0.tgz", + "integrity": "sha512-bxI7ThgLzPrPz484/S9jLlvUAHYMzy6I0XiU1ZMeAEOBcS0VePBFxh1JjTQt3Xiat5b6Oh4x7UC7IwKQKIJRIg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.0.tgz", + "integrity": "sha512-ZUAc2YK6JW89xTbXvftxdnYy3m4iHIkDtK3CLce8wg8M2L+YZhIvO1DKpxrd0Yr59AeNNkTiic9YLf6FTtXWMw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.0.tgz", + "integrity": "sha512-eSNxISBu8XweVEWG31/JzjkIGbGIJN/TrRoiSVZwZ6pkC6VX4Im/WV2cz559/TXLcYbcrDN8JtKgd9DJVIo8GA==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.0.tgz", + "integrity": "sha512-ZENoHJBxA20C2zFzh6AI4fT6RraMzjYw4xKWemRTRmRVtN9c5DcH9r/f2ihEkMjOW5eGgrwCslG/+Y/3bL+DHQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.13", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", + "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/source-map": { + "version": "0.3.11", + "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.11.tgz", + "integrity": "sha512-ZMp1V8ZFcPG5dIWnQLr3NSI1MiCU7UETdS/A0G8V/XWHvJv3ZsFqutJn1Y5RPmAPX6F3BiE397OqveU/9NCuIA==", + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.31", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", + "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@remotion/bundler": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/bundler/-/bundler-4.0.420.tgz", + "integrity": "sha512-CRpbio8gyPTO6Q4wykvQyEVjX4o3BRnH5GBIw4vSRPjJ0K6N+jUCxs6FRQRlmQisC+KobQ+JK2O5bKQmZxNynQ==", + "license": "SEE LICENSE IN LICENSE.md", + "dependencies": { + "@remotion/media-parser": "4.0.420", + "@remotion/studio": "4.0.420", + "@remotion/studio-shared": "4.0.420", + "css-loader": "5.2.7", + "esbuild": "0.25.0", + "react-refresh": "0.9.0", + "remotion": "4.0.420", + "source-map": "0.7.3", + "style-loader": "4.0.0", + "webpack": "5.96.1" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@remotion/cli": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/cli/-/cli-4.0.420.tgz", + "integrity": "sha512-XnrM5ko0RQvdYv468Pf90mKAezdM23SVRPy3o/2CFvvG9FxnchZqb58OzWWWeRCFtZtpOt1M2oq2mVPGcq8o2Q==", + "license": "SEE LICENSE IN LICENSE.md", + "dependencies": { + "@remotion/bundler": "4.0.420", + "@remotion/media-utils": "4.0.420", + "@remotion/player": "4.0.420", + "@remotion/renderer": "4.0.420", + "@remotion/studio": "4.0.420", + "@remotion/studio-server": "4.0.420", + "@remotion/studio-shared": "4.0.420", + "dotenv": "9.0.2", + "minimist": "1.2.6", + "prompts": "2.4.2", + "remotion": "4.0.420" + }, + "bin": { + "remotion": "remotion-cli.js", + "remotionb": "remotionb-cli.js", + "remotiond": "remotiond-cli.js" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@remotion/compositor-darwin-arm64": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/compositor-darwin-arm64/-/compositor-darwin-arm64-4.0.420.tgz", + "integrity": "sha512-LnJpAptwZkDQ1Dig3/Kdn4ga73MEmgk7LbLrReV8mypUZh5EGovnhp0pNsNwaeUL5nARU8Ny5Ok6/jqCzR1lmQ==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@remotion/compositor-darwin-x64": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/compositor-darwin-x64/-/compositor-darwin-x64-4.0.420.tgz", + "integrity": "sha512-xPEZlbQvOslzvSEbGDgpnxAMGCxZc5vABWSfllqUw5F1xmbAKiNPYfSImKXIu7ya0aigyuq5si+b+9t0Hz7Kfg==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@remotion/compositor-linux-arm64-gnu": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/compositor-linux-arm64-gnu/-/compositor-linux-arm64-gnu-4.0.420.tgz", + "integrity": "sha512-CCi6qaScg6H4Z1QkT6YeXiGMOxC1JVV34TNjES43QfLM9EvdEpTOBd8oOx1gg3jg7gJ6TwBoMbkpcPUonJRrYg==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@remotion/compositor-linux-arm64-musl": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/compositor-linux-arm64-musl/-/compositor-linux-arm64-musl-4.0.420.tgz", + "integrity": "sha512-pOKzZXr/hqyholHxhZuYuc3h0KOk7Al7+CYb0u/yag2E/rJUsgqi50zX/WuyyF06Pa+rhmmwS89Xc4DdrZB7FQ==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@remotion/compositor-linux-x64-gnu": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/compositor-linux-x64-gnu/-/compositor-linux-x64-gnu-4.0.420.tgz", + "integrity": "sha512-n441g77KwcYuHUPQx1ClsWu2S60Qlpw131NPy2PxJIc+DIAmM+K1rTq7Ycg+Ad9JQ4qrhAA7DUPL9lLG1ML4PA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@remotion/compositor-linux-x64-musl": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/compositor-linux-x64-musl/-/compositor-linux-x64-musl-4.0.420.tgz", + "integrity": "sha512-BCsqBPtNM0w4efJzfzbuPdqNdVjC6rjCpwmQVNdVSh1AlUqFMROpiXO1mevMx36BvWRWIv/wjDLrB7nXyT2SUQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@remotion/compositor-win32-x64-msvc": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/compositor-win32-x64-msvc/-/compositor-win32-x64-msvc-4.0.420.tgz", + "integrity": "sha512-XQyVLSPjc4RcsN8MHPHfFNdaHqkLKTt4Ww2P/yNvE9RDdERNvG9hLvoCJJSEKQzbS2f6i+n6RDO9cN+qc7lFYQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@remotion/licensing": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/licensing/-/licensing-4.0.420.tgz", + "integrity": "sha512-JJs0KOa8fCXvJXUya/m5n3FwxckqYQeo/dUFYm/cFIXgvvB/vPeB25jgWw9OhevoHmyvmkzgg8wx0yVrRbWD1A==", + "license": "MIT" + }, + "node_modules/@remotion/media-parser": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/media-parser/-/media-parser-4.0.420.tgz", + "integrity": "sha512-py5CFaSbApIfh+aWeFMoAHqWd0Vc0U5YvlB2c7qD+bWPNEQ3j+CGbBn84CbWDEF8M7Dg5TiKIdrVoxyK3vNNfw==", + "license": "Remotion License https://remotion.dev/license" + }, + "node_modules/@remotion/media-utils": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/media-utils/-/media-utils-4.0.420.tgz", + "integrity": "sha512-eg0iBiTDI5RYd5npzi9mbUMUfTnP0hIfewsR5BfqyjorUimdbySzOVMprkR10TQgXX7kBmaN6HfQnJ0kzDBymg==", + "license": "MIT", + "dependencies": { + "@remotion/media-parser": "4.0.420", + "@remotion/webcodecs": "4.0.420", + "mediabunny": "1.29.0", + "remotion": "4.0.420" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@remotion/player": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/player/-/player-4.0.420.tgz", + "integrity": "sha512-Nx5sIvb7K4SWGlbzVFv9qC9vKdTgIhttA84SoBFuWSdLT27jT4ap16Kj9snrEDHtPgZ0YH0w8fetd5cSmo/3yw==", + "license": "SEE LICENSE IN LICENSE.md", + "dependencies": { + "remotion": "4.0.420" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@remotion/renderer": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/renderer/-/renderer-4.0.420.tgz", + "integrity": "sha512-wATwp/R1cxmeD+hWoKPZip2aOOc1pPuJCQrugS3S0pRiB3Od3867QKMWOZyde4Yn5eX+/RFbRAQZqaqkctm7lw==", + "license": "SEE LICENSE IN LICENSE.md", + "dependencies": { + "@remotion/licensing": "4.0.420", + "@remotion/streaming": "4.0.420", + "execa": "5.1.1", + "extract-zip": "2.0.1", + "remotion": "4.0.420", + "source-map": "^0.8.0-beta.0", + "ws": "8.17.1" + }, + "optionalDependencies": { + "@remotion/compositor-darwin-arm64": "4.0.420", + "@remotion/compositor-darwin-x64": "4.0.420", + "@remotion/compositor-linux-arm64-gnu": "4.0.420", + "@remotion/compositor-linux-arm64-musl": "4.0.420", + "@remotion/compositor-linux-x64-gnu": "4.0.420", + "@remotion/compositor-linux-x64-musl": "4.0.420", + "@remotion/compositor-win32-x64-msvc": "4.0.420" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@remotion/renderer/node_modules/source-map": { + "version": "0.8.0-beta.0", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.8.0-beta.0.tgz", + "integrity": "sha512-2ymg6oRBpebeZi9UUNsgQ89bhx01TcTkmNTGnNO88imTmbSgy4nfujrgVEFKWpMTEGA11EDkTt7mqObTPdigIA==", + "deprecated": "The work that was done in this beta branch won't be included in future versions", + "license": "BSD-3-Clause", + "dependencies": { + "whatwg-url": "^7.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@remotion/streaming": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/streaming/-/streaming-4.0.420.tgz", + "integrity": "sha512-Axlvz82vuW+O1z50n7zxUI1aJhIb4CVujHyOby+C1FTGzdiW7Zw0YN/3nQNitufhRBy+vceRGQg9FiucCQ5Ydg==", + "license": "MIT" + }, + "node_modules/@remotion/studio": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/studio/-/studio-4.0.420.tgz", + "integrity": "sha512-CHjPBWQYL7i9ULA0n3CgFdqcapXA6pvM6HyKqKIthCi1TCzmHzey3Sy3e/qTCc/V8gztB5y60svCWgmDXAHMmg==", + "license": "MIT", + "dependencies": { + "@remotion/media-utils": "4.0.420", + "@remotion/player": "4.0.420", + "@remotion/renderer": "4.0.420", + "@remotion/studio-shared": "4.0.420", + "@remotion/web-renderer": "4.0.420", + "@remotion/zod-types": "4.0.420", + "mediabunny": "1.29.0", + "memfs": "3.4.3", + "open": "^8.4.2", + "remotion": "4.0.420", + "semver": "7.5.3", + "source-map": "0.7.3", + "zod": "3.22.3" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@remotion/studio-server": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/studio-server/-/studio-server-4.0.420.tgz", + "integrity": "sha512-XYz/4pR4vpYGyw0NXsI+NYoQoeoa0AJNnamCTExaZBtkuzbjHYrmJbR1U+ofbKMKnIXI0fsROUGQ6wWE39w3CA==", + "license": "MIT", + "dependencies": { + "@babel/parser": "7.24.1", + "@remotion/bundler": "4.0.420", + "@remotion/renderer": "4.0.420", + "@remotion/studio-shared": "4.0.420", + "memfs": "3.4.3", + "open": "^8.4.2", + "recast": "0.23.11", + "remotion": "4.0.420", + "semver": "7.5.3", + "source-map": "0.7.3" + } + }, + "node_modules/@remotion/studio-shared": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/studio-shared/-/studio-shared-4.0.420.tgz", + "integrity": "sha512-FjCWOv/LF0m/xeSIzK26rlxiCZubVin1nP/pOG7c4Uv5BzTAUDJ1Fn9zhTNx9SPvRvK4HtLiHtKhELtQFlTwIw==", + "license": "MIT", + "dependencies": { + "remotion": "4.0.420" + } + }, + "node_modules/@remotion/web-renderer": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/web-renderer/-/web-renderer-4.0.420.tgz", + "integrity": "sha512-lXdT4jQA0nvEf3/Vvkp1q6QSld49RDla6Ddgr1KSArFmZkPzQPQRYdFcH3V8xm7qeYtT9vfK72k9xgvyd0YtvA==", + "license": "UNLICENSED", + "dependencies": { + "@remotion/licensing": "4.0.420", + "mediabunny": "1.29.0", + "remotion": "4.0.420" + }, + "peerDependencies": { + "react": ">=18.0.0", + "react-dom": ">=18.0.0" + } + }, + "node_modules/@remotion/webcodecs": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/webcodecs/-/webcodecs-4.0.420.tgz", + "integrity": "sha512-MxLXck92+h/KtVptM/JnOVKLoLrv+zO/tKTpcCloaS+qykjoTcEELlgsWv9PNarEgZUVpgU74/MdZ0DsqpJwhA==", + "license": "Remotion License (See https://remotion.dev/docs/webcodecs#license)", + "dependencies": { + "@remotion/media-parser": "4.0.420" + } + }, + "node_modules/@remotion/zod-types": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/@remotion/zod-types/-/zod-types-4.0.420.tgz", + "integrity": "sha512-sq4gXKd2tqTpPniVrAjCD3q+ZKKdiOnqYN3eGvfjRIDh25i0wKLz1BkSLEiCZ362gX+6gCd9n+Vo8Bfu8KvXWA==", + "license": "MIT", + "dependencies": { + "remotion": "4.0.420" + }, + "peerDependencies": { + "zod": "3.22.3" + } + }, + "node_modules/@types/dom-mediacapture-transform": { + "version": "0.1.11", + "resolved": "https://registry.npmjs.org/@types/dom-mediacapture-transform/-/dom-mediacapture-transform-0.1.11.tgz", + "integrity": "sha512-Y2p+nGf1bF2XMttBnsVPHUWzRRZzqUoJAKmiP10b5umnO6DDrWI0BrGDJy1pOHoOULVmGSfFNkQrAlC5dcj6nQ==", + "license": "MIT", + "dependencies": { + "@types/dom-webcodecs": "*" + } + }, + "node_modules/@types/dom-webcodecs": { + "version": "0.1.13", + "resolved": "https://registry.npmjs.org/@types/dom-webcodecs/-/dom-webcodecs-0.1.13.tgz", + "integrity": "sha512-O5hkiFIcjjszPIYyUSyvScyvrBoV3NOEEZx/pMlsu44TKzWNkLVBBxnxJz42in5n3QIolYOcBYFCPZZ0h8SkwQ==", + "license": "MIT" + }, + "node_modules/@types/eslint": { + "version": "9.6.1", + "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-9.6.1.tgz", + "integrity": "sha512-FXx2pKgId/WyYo2jXw63kk7/+TY7u7AziEJxJAnSFzHlqTAS3Ync6SvgYAN/k4/PQpnnVuzoMuVnByKK2qp0ag==", + "license": "MIT", + "dependencies": { + "@types/estree": "*", + "@types/json-schema": "*" + } + }, + "node_modules/@types/eslint-scope": { + "version": "3.7.7", + "resolved": "https://registry.npmjs.org/@types/eslint-scope/-/eslint-scope-3.7.7.tgz", + "integrity": "sha512-MzMFlSLBqNF2gcHWO0G1vP/YQyfvrxZ0bF+u7mzUdZ1/xK4A4sru+nraZz5i3iEIk1l1uyicaDVTB4QbbEkAYg==", + "license": "MIT", + "dependencies": { + "@types/eslint": "*", + "@types/estree": "*" + } + }, + "node_modules/@types/estree": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", + "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", + "license": "MIT" + }, + "node_modules/@types/json-schema": { + "version": "7.0.15", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", + "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "25.2.2", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.2.tgz", + "integrity": "sha512-BkmoP5/FhRYek5izySdkOneRyXYN35I860MFAGupTdebyE66uZaR+bXLHq8k4DirE5DwQi3NuhvRU1jqTVwUrQ==", + "license": "MIT", + "dependencies": { + "undici-types": "~7.16.0" + } + }, + "node_modules/@types/react": { + "version": "19.2.13", + "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.13.tgz", + "integrity": "sha512-KkiJeU6VbYbUOp5ITMIc7kBfqlYkKA5KhEHVrGMmUUMt7NeaZg65ojdPk+FtNrBAOXNVM5QM72jnADjM+XVRAQ==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "csstype": "^3.2.2" + } + }, + "node_modules/@types/react-dom": { + "version": "19.2.3", + "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.3.tgz", + "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "@types/react": "^19.2.0" + } + }, + "node_modules/@types/yauzl": { + "version": "2.10.3", + "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz", + "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==", + "license": "MIT", + "optional": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/@webassemblyjs/ast": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.14.1.tgz", + "integrity": "sha512-nuBEDgQfm1ccRp/8bCQrx1frohyufl4JlbMMZ4P1wpeOfDhF6FQkxZJ1b/e+PLwr6X1Nhw6OLme5usuBWYBvuQ==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/helper-numbers": "1.13.2", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2" + } + }, + "node_modules/@webassemblyjs/floating-point-hex-parser": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.13.2.tgz", + "integrity": "sha512-6oXyTOzbKxGH4steLbLNOu71Oj+C8Lg34n6CqRvqfS2O71BxY6ByfMDRhBytzknj9yGUPVJ1qIKhRlAwO1AovA==", + "license": "MIT" + }, + "node_modules/@webassemblyjs/helper-api-error": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.13.2.tgz", + "integrity": "sha512-U56GMYxy4ZQCbDZd6JuvvNV/WFildOjsaWD3Tzzvmw/mas3cXzRJPMjP83JqEsgSbyrmaGjBfDtV7KDXV9UzFQ==", + "license": "MIT" + }, + "node_modules/@webassemblyjs/helper-buffer": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.14.1.tgz", + "integrity": "sha512-jyH7wtcHiKssDtFPRB+iQdxlDf96m0E39yb0k5uJVhFGleZFoNw1c4aeIcVUPPbXUVJ94wwnMOAqUHyzoEPVMA==", + "license": "MIT" + }, + "node_modules/@webassemblyjs/helper-numbers": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.13.2.tgz", + "integrity": "sha512-FE8aCmS5Q6eQYcV3gI35O4J789wlQA+7JrqTTpJqn5emA4U2hvwJmvFRC0HODS+3Ye6WioDklgd6scJ3+PLnEA==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/floating-point-hex-parser": "1.13.2", + "@webassemblyjs/helper-api-error": "1.13.2", + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@webassemblyjs/helper-wasm-bytecode": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.13.2.tgz", + "integrity": "sha512-3QbLKy93F0EAIXLh0ogEVR6rOubA9AoZ+WRYhNbFyuB70j3dRdwH9g+qXhLAO0kiYGlg3TxDV+I4rQTr/YNXkA==", + "license": "MIT" + }, + "node_modules/@webassemblyjs/helper-wasm-section": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.14.1.tgz", + "integrity": "sha512-ds5mXEqTJ6oxRoqjhWDU83OgzAYjwsCV8Lo/N+oRsNDmx/ZDpqalmrtgOMkHwxsG0iI//3BwWAErYRHtgn0dZw==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-buffer": "1.14.1", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2", + "@webassemblyjs/wasm-gen": "1.14.1" + } + }, + "node_modules/@webassemblyjs/ieee754": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.13.2.tgz", + "integrity": "sha512-4LtOzh58S/5lX4ITKxnAK2USuNEvpdVV9AlgGQb8rJDHaLeHciwG4zlGr0j/SNWlr7x3vO1lDEsuePvtcDNCkw==", + "license": "MIT", + "dependencies": { + "@xtuc/ieee754": "^1.2.0" + } + }, + "node_modules/@webassemblyjs/leb128": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.13.2.tgz", + "integrity": "sha512-Lde1oNoIdzVzdkNEAWZ1dZ5orIbff80YPdHx20mrHwHrVNNTjNr8E3xz9BdpcGqRQbAEa+fkrCb+fRFTl/6sQw==", + "license": "Apache-2.0", + "dependencies": { + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@webassemblyjs/utf8": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.13.2.tgz", + "integrity": "sha512-3NQWGjKTASY1xV5m7Hr0iPeXD9+RDobLll3T9d2AO+g3my8xy5peVyjSag4I50mR1bBSN/Ct12lo+R9tJk0NZQ==", + "license": "MIT" + }, + "node_modules/@webassemblyjs/wasm-edit": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.14.1.tgz", + "integrity": "sha512-RNJUIQH/J8iA/1NzlE4N7KtyZNHi3w7at7hDjvRNm5rcUXa00z1vRz3glZoULfJ5mpvYhLybmVcwcjGrC1pRrQ==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-buffer": "1.14.1", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2", + "@webassemblyjs/helper-wasm-section": "1.14.1", + "@webassemblyjs/wasm-gen": "1.14.1", + "@webassemblyjs/wasm-opt": "1.14.1", + "@webassemblyjs/wasm-parser": "1.14.1", + "@webassemblyjs/wast-printer": "1.14.1" + } + }, + "node_modules/@webassemblyjs/wasm-gen": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.14.1.tgz", + "integrity": "sha512-AmomSIjP8ZbfGQhumkNvgC33AY7qtMCXnN6bL2u2Js4gVCg8fp735aEiMSBbDR7UQIj90n4wKAFUSEd0QN2Ukg==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2", + "@webassemblyjs/ieee754": "1.13.2", + "@webassemblyjs/leb128": "1.13.2", + "@webassemblyjs/utf8": "1.13.2" + } + }, + "node_modules/@webassemblyjs/wasm-opt": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.14.1.tgz", + "integrity": "sha512-PTcKLUNvBqnY2U6E5bdOQcSM+oVP/PmrDY9NzowJjislEjwP/C4an2303MCVS2Mg9d3AJpIGdUFIQQWbPds0Sw==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-buffer": "1.14.1", + "@webassemblyjs/wasm-gen": "1.14.1", + "@webassemblyjs/wasm-parser": "1.14.1" + } + }, + "node_modules/@webassemblyjs/wasm-parser": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.14.1.tgz", + "integrity": "sha512-JLBl+KZ0R5qB7mCnud/yyX08jWFw5MsoalJ1pQ4EdFlgj9VdXKGuENGsiCIjegI1W7p91rUlcB/LB5yRJKNTcQ==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-api-error": "1.13.2", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2", + "@webassemblyjs/ieee754": "1.13.2", + "@webassemblyjs/leb128": "1.13.2", + "@webassemblyjs/utf8": "1.13.2" + } + }, + "node_modules/@webassemblyjs/wast-printer": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.14.1.tgz", + "integrity": "sha512-kPSSXE6De1XOR820C90RIo2ogvZG+c3KiHzqUoO/F34Y2shGzesfqv7o57xrxovZJH/MetF5UjroJ/R/3isoiw==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@xtuc/ieee754": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@xtuc/ieee754/-/ieee754-1.2.0.tgz", + "integrity": "sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==", + "license": "BSD-3-Clause" + }, + "node_modules/@xtuc/long": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/@xtuc/long/-/long-4.2.2.tgz", + "integrity": "sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==", + "license": "Apache-2.0" + }, + "node_modules/acorn": { + "version": "8.15.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", + "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "license": "MIT", + "peer": true, + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz", + "integrity": "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==", + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/ajv-formats/node_modules/ajv": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, + "node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "license": "MIT", + "peerDependencies": { + "ajv": "^6.9.1" + } + }, + "node_modules/ast-types": { + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.16.1.tgz", + "integrity": "sha512-6t10qk83GOG8p0vKmaCr8eiilZwO171AvbROMtvvNiwrTly62t+7XkA8RdIIVbpMhCASAsxgAzdRSwh6nw/5Dg==", + "license": "MIT", + "dependencies": { + "tslib": "^2.0.1" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/baseline-browser-mapping": { + "version": "2.9.19", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.19.tgz", + "integrity": "sha512-ipDqC8FrAl/76p2SSWKSI+H9tFwm7vYqXQrItCuiVPt26Km0jS+NzSsBWAaBusvSbQcfJG+JitdMm+wZAgTYqg==", + "license": "Apache-2.0", + "bin": { + "baseline-browser-mapping": "dist/cli.js" + } + }, + "node_modules/big.js": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz", + "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/browserslist": { + "version": "4.28.1", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.1.tgz", + "integrity": "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "peer": true, + "dependencies": { + "baseline-browser-mapping": "^2.9.0", + "caniuse-lite": "^1.0.30001759", + "electron-to-chromium": "^1.5.263", + "node-releases": "^2.0.27", + "update-browserslist-db": "^1.2.0" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "license": "MIT" + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001769", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001769.tgz", + "integrity": "sha512-BCfFL1sHijQlBGWBMuJyhZUhzo7wer5sVj9hqekB/7xn0Ypy+pER/edCYQm4exbXj4WiySGp40P8UuTh6w1srg==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "CC-BY-4.0" + }, + "node_modules/chrome-trace-event": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/chrome-trace-event/-/chrome-trace-event-1.0.4.tgz", + "integrity": "sha512-rNjApaLzuwaOTjCiT8lSDdGN1APCiqkChLMJxJPWLunPAt5fy8xgU9/jNOchV84wfIxrA0lRQB7oCT8jrn/wrQ==", + "license": "MIT", + "engines": { + "node": ">=6.0" + } + }, + "node_modules/commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", + "license": "MIT" + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/css-loader": { + "version": "5.2.7", + "resolved": "https://registry.npmjs.org/css-loader/-/css-loader-5.2.7.tgz", + "integrity": "sha512-Q7mOvpBNBG7YrVGMxRxcBJZFL75o+cH2abNASdibkj/fffYD8qWbInZrD0S9ccI6vZclF3DsHE7njGlLtaHbhg==", + "license": "MIT", + "dependencies": { + "icss-utils": "^5.1.0", + "loader-utils": "^2.0.0", + "postcss": "^8.2.15", + "postcss-modules-extract-imports": "^3.0.0", + "postcss-modules-local-by-default": "^4.0.0", + "postcss-modules-scope": "^3.0.0", + "postcss-modules-values": "^4.0.0", + "postcss-value-parser": "^4.1.0", + "schema-utils": "^3.0.0", + "semver": "^7.3.5" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependencies": { + "webpack": "^4.27.0 || ^5.0.0" + } + }, + "node_modules/cssesc": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", + "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==", + "license": "MIT", + "bin": { + "cssesc": "bin/cssesc" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/csstype": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", + "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/define-lazy-prop": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz", + "integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/dotenv": { + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-9.0.2.tgz", + "integrity": "sha512-I9OvvrHp4pIARv4+x9iuewrWycX6CcZtoAu1XrzPxc5UygMJXJZYmBsynku8IkrJwgypE5DGNjDPmPRhDCptUg==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=10" + } + }, + "node_modules/electron-to-chromium": { + "version": "1.5.286", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.286.tgz", + "integrity": "sha512-9tfDXhJ4RKFNerfjdCcZfufu49vg620741MNs26a9+bhLThdB+plgMeou98CAaHu/WATj2iHOOHTp1hWtABj2A==", + "license": "ISC" + }, + "node_modules/emojis-list": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-3.0.0.tgz", + "integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/enhanced-resolve": { + "version": "5.19.0", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.19.0.tgz", + "integrity": "sha512-phv3E1Xl4tQOShqSte26C7Fl84EwUdZsyOuSSk9qtAGyyQs2s3jJzComh+Abf4g187lUUAvH+H26omrqia2aGg==", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.4", + "tapable": "^2.3.0" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/es-module-lexer": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", + "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==", + "license": "MIT" + }, + "node_modules/esbuild": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.0.tgz", + "integrity": "sha512-BXq5mqc8ltbaN34cDqWuYKyNhX8D/Z0J1xdtdQ8UcIIIyJyz+ZMKUt58tF3SrZ85jcfN/PZYhjR5uDQAYNVbuw==", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.25.0", + "@esbuild/android-arm": "0.25.0", + "@esbuild/android-arm64": "0.25.0", + "@esbuild/android-x64": "0.25.0", + "@esbuild/darwin-arm64": "0.25.0", + "@esbuild/darwin-x64": "0.25.0", + "@esbuild/freebsd-arm64": "0.25.0", + "@esbuild/freebsd-x64": "0.25.0", + "@esbuild/linux-arm": "0.25.0", + "@esbuild/linux-arm64": "0.25.0", + "@esbuild/linux-ia32": "0.25.0", + "@esbuild/linux-loong64": "0.25.0", + "@esbuild/linux-mips64el": "0.25.0", + "@esbuild/linux-ppc64": "0.25.0", + "@esbuild/linux-riscv64": "0.25.0", + "@esbuild/linux-s390x": "0.25.0", + "@esbuild/linux-x64": "0.25.0", + "@esbuild/netbsd-arm64": "0.25.0", + "@esbuild/netbsd-x64": "0.25.0", + "@esbuild/openbsd-arm64": "0.25.0", + "@esbuild/openbsd-x64": "0.25.0", + "@esbuild/sunos-x64": "0.25.0", + "@esbuild/win32-arm64": "0.25.0", + "@esbuild/win32-ia32": "0.25.0", + "@esbuild/win32-x64": "0.25.0" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/eslint-scope": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", + "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", + "license": "BSD-2-Clause", + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^4.1.1" + }, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "license": "BSD-2-Clause", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "license": "BSD-2-Clause", + "dependencies": { + "estraverse": "^5.2.0" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esrecurse/node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "license": "MIT", + "engines": { + "node": ">=0.8.x" + } + }, + "node_modules/execa": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", + "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==", + "license": "MIT", + "dependencies": { + "cross-spawn": "^7.0.3", + "get-stream": "^6.0.0", + "human-signals": "^2.1.0", + "is-stream": "^2.0.0", + "merge-stream": "^2.0.0", + "npm-run-path": "^4.0.1", + "onetime": "^5.1.2", + "signal-exit": "^3.0.3", + "strip-final-newline": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sindresorhus/execa?sponsor=1" + } + }, + "node_modules/extract-zip": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", + "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", + "license": "BSD-2-Clause", + "dependencies": { + "debug": "^4.1.1", + "get-stream": "^5.1.0", + "yauzl": "^2.10.0" + }, + "bin": { + "extract-zip": "cli.js" + }, + "engines": { + "node": ">= 10.17.0" + }, + "optionalDependencies": { + "@types/yauzl": "^2.9.1" + } + }, + "node_modules/extract-zip/node_modules/get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "license": "MIT" + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "license": "MIT" + }, + "node_modules/fast-uri": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/fd-slicer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", + "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "license": "MIT", + "dependencies": { + "pend": "~1.2.0" + } + }, + "node_modules/fs-monkey": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/fs-monkey/-/fs-monkey-1.0.3.tgz", + "integrity": "sha512-cybjIfiiE+pTWicSCLFHSrXZ6EilF30oh91FDP9S2B051prEa7QWfrVTQm10/dDpswBDXZugPa1Ogu8Yh+HV0Q==", + "license": "Unlicense" + }, + "node_modules/get-stream": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", + "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/glob-to-regexp": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz", + "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==", + "license": "BSD-2-Clause" + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "license": "ISC" + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/human-signals": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz", + "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", + "license": "Apache-2.0", + "engines": { + "node": ">=10.17.0" + } + }, + "node_modules/icss-utils": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/icss-utils/-/icss-utils-5.1.0.tgz", + "integrity": "sha512-soFhflCVWLfRNOPU3iv5Z9VUdT44xFRbzjLsEzSr5AQmgqPMTHdU3PMT1Cf1ssx8fLNJDA1juftYl+PUcv3MqA==", + "license": "ISC", + "engines": { + "node": "^10 || ^12 || >= 14" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/is-docker": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz", + "integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==", + "license": "MIT", + "bin": { + "is-docker": "cli.js" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-wsl": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-2.2.0.tgz", + "integrity": "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==", + "license": "MIT", + "dependencies": { + "is-docker": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "license": "ISC" + }, + "node_modules/jest-worker": { + "version": "27.5.1", + "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-27.5.1.tgz", + "integrity": "sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==", + "license": "MIT", + "dependencies": { + "@types/node": "*", + "merge-stream": "^2.0.0", + "supports-color": "^8.0.0" + }, + "engines": { + "node": ">= 10.13.0" + } + }, + "node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "license": "MIT" + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "license": "MIT" + }, + "node_modules/json5": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "license": "MIT", + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/kleur": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", + "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/loader-runner": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.1.tgz", + "integrity": "sha512-IWqP2SCPhyVFTBtRcgMHdzlf9ul25NwaFx4wCEH/KjAXuuHY4yNjvPXsBokp8jCB936PyWRaPKUNh8NvylLp2Q==", + "license": "MIT", + "engines": { + "node": ">=6.11.5" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, + "node_modules/loader-utils": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz", + "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==", + "license": "MIT", + "dependencies": { + "big.js": "^5.2.2", + "emojis-list": "^3.0.0", + "json5": "^2.1.2" + }, + "engines": { + "node": ">=8.9.0" + } + }, + "node_modules/lodash.sortby": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz", + "integrity": "sha512-HDWXG8isMntAyRF5vZ7xKuEvOhT4AhlRt/3czTSjvGUxjYCBVRQY48ViDHyfYz9VIoBkW4TMGQNapx+l3RUwdA==", + "license": "MIT" + }, + "node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/mediabunny": { + "version": "1.29.0", + "resolved": "https://registry.npmjs.org/mediabunny/-/mediabunny-1.29.0.tgz", + "integrity": "sha512-18B8w/rhO/ph/AFsIXvzZg8RaSQZ+ZYfJ99MZlTjDmlgCT58jV3azrnWQ/OSquYDi8q0xmn64mnfTEHgww3+zw==", + "license": "MPL-2.0", + "workspaces": [ + "packages/*" + ], + "dependencies": { + "@types/dom-mediacapture-transform": "^0.1.11", + "@types/dom-webcodecs": "0.1.13" + }, + "funding": { + "type": "individual", + "url": "https://github.com/sponsors/Vanilagy" + } + }, + "node_modules/memfs": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/memfs/-/memfs-3.4.3.tgz", + "integrity": "sha512-eivjfi7Ahr6eQTn44nvTnR60e4a1Fs1Via2kCR5lHo/kyNoiMWaXCNJ/GpSd0ilXas2JSOl9B5FTIhflXu0hlg==", + "license": "Unlicense", + "dependencies": { + "fs-monkey": "1.0.3" + }, + "engines": { + "node": ">= 4.0.0" + } + }, + "node_modules/merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "license": "MIT" + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/minimist": { + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", + "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==", + "license": "MIT" + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/nanoid": { + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/neo-async": { + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", + "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==", + "license": "MIT" + }, + "node_modules/node-releases": { + "version": "2.0.27", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz", + "integrity": "sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==", + "license": "MIT" + }, + "node_modules/npm-run-path": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", + "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==", + "license": "MIT", + "dependencies": { + "path-key": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/onetime": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", + "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", + "license": "MIT", + "dependencies": { + "mimic-fn": "^2.1.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/open": { + "version": "8.4.2", + "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz", + "integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==", + "license": "MIT", + "dependencies": { + "define-lazy-prop": "^2.0.0", + "is-docker": "^2.1.1", + "is-wsl": "^2.2.0" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/pend": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", + "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==", + "license": "MIT" + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "license": "ISC" + }, + "node_modules/postcss": { + "version": "8.5.6", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", + "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "peer": true, + "dependencies": { + "nanoid": "^3.3.11", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/postcss-modules-extract-imports": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/postcss-modules-extract-imports/-/postcss-modules-extract-imports-3.1.0.tgz", + "integrity": "sha512-k3kNe0aNFQDAZGbin48pL2VNidTF0w4/eASDsxlyspobzU3wZQLOGj7L9gfRe0Jo9/4uud09DsjFNH7winGv8Q==", + "license": "ISC", + "engines": { + "node": "^10 || ^12 || >= 14" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/postcss-modules-local-by-default": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/postcss-modules-local-by-default/-/postcss-modules-local-by-default-4.2.0.tgz", + "integrity": "sha512-5kcJm/zk+GJDSfw+V/42fJ5fhjL5YbFDl8nVdXkJPLLW+Vf9mTD5Xe0wqIaDnLuL2U6cDNpTr+UQ+v2HWIBhzw==", + "license": "MIT", + "dependencies": { + "icss-utils": "^5.0.0", + "postcss-selector-parser": "^7.0.0", + "postcss-value-parser": "^4.1.0" + }, + "engines": { + "node": "^10 || ^12 || >= 14" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/postcss-modules-scope": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/postcss-modules-scope/-/postcss-modules-scope-3.2.1.tgz", + "integrity": "sha512-m9jZstCVaqGjTAuny8MdgE88scJnCiQSlSrOWcTQgM2t32UBe+MUmFSO5t7VMSfAf/FJKImAxBav8ooCHJXCJA==", + "license": "ISC", + "dependencies": { + "postcss-selector-parser": "^7.0.0" + }, + "engines": { + "node": "^10 || ^12 || >= 14" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/postcss-modules-values": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/postcss-modules-values/-/postcss-modules-values-4.0.0.tgz", + "integrity": "sha512-RDxHkAiEGI78gS2ofyvCsu7iycRv7oqw5xMWn9iMoR0N/7mf9D50ecQqUo5BZ9Zh2vH4bCUR/ktCqbB9m8vJjQ==", + "license": "ISC", + "dependencies": { + "icss-utils": "^5.0.0" + }, + "engines": { + "node": "^10 || ^12 || >= 14" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/postcss-selector-parser": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.1.tgz", + "integrity": "sha512-orRsuYpJVw8LdAwqqLykBj9ecS5/cRHlI5+nvTo8LcCKmzDmqVORXtOIYEEQuL9D4BxtA1lm5isAqzQZCoQ6Eg==", + "license": "MIT", + "dependencies": { + "cssesc": "^3.0.0", + "util-deprecate": "^1.0.2" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/postcss-value-parser": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", + "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", + "license": "MIT" + }, + "node_modules/prompts": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", + "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==", + "license": "MIT", + "dependencies": { + "kleur": "^3.0.3", + "sisteransi": "^1.0.5" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/pump": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz", + "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==", + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/randombytes": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", + "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", + "license": "MIT", + "dependencies": { + "safe-buffer": "^5.1.0" + } + }, + "node_modules/react": { + "version": "19.2.4", + "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", + "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", + "license": "MIT", + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react-dom": { + "version": "19.2.4", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", + "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "scheduler": "^0.27.0" + }, + "peerDependencies": { + "react": "^19.2.4" + } + }, + "node_modules/react-refresh": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.9.0.tgz", + "integrity": "sha512-Gvzk7OZpiqKSkxsQvO/mbTN1poglhmAV7gR/DdIrRrSMXraRQQlfikRJOr3Nb9GTMPC5kof948Zy6jJZIFtDvQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/recast": { + "version": "0.23.11", + "resolved": "https://registry.npmjs.org/recast/-/recast-0.23.11.tgz", + "integrity": "sha512-YTUo+Flmw4ZXiWfQKGcwwc11KnoRAYgzAE2E7mXKCjSviTKShtxBsN6YUUBB2gtaBzKzeKunxhUwNHQuRryhWA==", + "license": "MIT", + "dependencies": { + "ast-types": "^0.16.1", + "esprima": "~4.0.0", + "source-map": "~0.6.1", + "tiny-invariant": "^1.3.3", + "tslib": "^2.0.1" + }, + "engines": { + "node": ">= 4" + } + }, + "node_modules/recast/node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/remotion": { + "version": "4.0.420", + "resolved": "https://registry.npmjs.org/remotion/-/remotion-4.0.420.tgz", + "integrity": "sha512-PmFYbYWCVmi8qaDphpeYPU7/SDADjTPtieEvW5VQEQ6SQP3Ntw37Dvr/Y0pm4gOU32Iw7WwT9x96UjCxdZ3d5Q==", + "license": "SEE LICENSE IN LICENSE.md", + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/scheduler": { + "version": "0.27.0", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", + "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==", + "license": "MIT" + }, + "node_modules/schema-utils": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz", + "integrity": "sha512-pN/yOAvcC+5rQ5nERGuwrjLlYvLTbCibnZ1I7B1LaiAz9BRBlE9GMgE/eqV30P7aJQUf7Ddimy/RsbYO/GrVGg==", + "license": "MIT", + "dependencies": { + "@types/json-schema": "^7.0.8", + "ajv": "^6.12.5", + "ajv-keywords": "^3.5.2" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, + "node_modules/semver": { + "version": "7.5.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.3.tgz", + "integrity": "sha512-QBlUtyVk/5EeHbi7X0fw6liDZc7BBmEaSYn01fMU1OUYbf6GPsbTtd8WmnqbI20SeycoHSeiybkE/q1Q+qlThQ==", + "license": "ISC", + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/serialize-javascript": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz", + "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", + "license": "BSD-3-Clause", + "dependencies": { + "randombytes": "^2.1.0" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "license": "ISC" + }, + "node_modules/sisteransi": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", + "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", + "license": "MIT" + }, + "node_modules/source-map": { + "version": "0.7.3", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz", + "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==", + "license": "BSD-3-Clause", + "engines": { + "node": ">= 8" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-support": { + "version": "0.5.21", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz", + "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==", + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "node_modules/source-map-support/node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/strip-final-newline": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz", + "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/style-loader": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/style-loader/-/style-loader-4.0.0.tgz", + "integrity": "sha512-1V4WqhhZZgjVAVJyt7TdDPZoPBPNHbekX4fWnCJL1yQukhCeZhJySUL+gL9y6sNdN95uEOS83Y55SqHcP7MzLA==", + "license": "MIT", + "engines": { + "node": ">= 18.12.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependencies": { + "webpack": "^5.27.0" + } + }, + "node_modules/supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, + "node_modules/tapable": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz", + "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==", + "license": "MIT", + "engines": { + "node": ">=6" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, + "node_modules/terser": { + "version": "5.46.0", + "resolved": "https://registry.npmjs.org/terser/-/terser-5.46.0.tgz", + "integrity": "sha512-jTwoImyr/QbOWFFso3YoU3ik0jBBDJ6JTOQiy/J2YxVJdZCc+5u7skhNwiOR3FQIygFqVUPHl7qbbxtjW2K3Qg==", + "license": "BSD-2-Clause", + "dependencies": { + "@jridgewell/source-map": "^0.3.3", + "acorn": "^8.15.0", + "commander": "^2.20.0", + "source-map-support": "~0.5.20" + }, + "bin": { + "terser": "bin/terser" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/terser-webpack-plugin": { + "version": "5.3.16", + "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.16.tgz", + "integrity": "sha512-h9oBFCWrq78NyWWVcSwZarJkZ01c2AyGrzs1crmHZO3QUg9D61Wu4NPjBy69n7JqylFF5y+CsUZYmYEIZ3mR+Q==", + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.25", + "jest-worker": "^27.4.5", + "schema-utils": "^4.3.0", + "serialize-javascript": "^6.0.2", + "terser": "^5.31.1" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependencies": { + "webpack": "^5.1.0" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "esbuild": { + "optional": true + }, + "uglify-js": { + "optional": true + } + } + }, + "node_modules/terser-webpack-plugin/node_modules/ajv": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "license": "MIT", + "peer": true, + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/terser-webpack-plugin/node_modules/ajv-keywords": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-5.1.0.tgz", + "integrity": "sha512-YCS/JNFAUyr5vAuhk1DWm1CBxRHW9LbJ2ozWeemrIqpbsqKjHVxYPyi5GC0rjZIT5JxJ3virVTS8wk4i/Z+krw==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3" + }, + "peerDependencies": { + "ajv": "^8.8.2" + } + }, + "node_modules/terser-webpack-plugin/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, + "node_modules/terser-webpack-plugin/node_modules/schema-utils": { + "version": "4.3.3", + "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-4.3.3.tgz", + "integrity": "sha512-eflK8wEtyOE6+hsaRVPxvUKYCpRgzLqDTb8krvAsRIwOGlHoSgYLgBXoubGgLd2fT41/OUYdb48v4k4WWHQurA==", + "license": "MIT", + "dependencies": { + "@types/json-schema": "^7.0.9", + "ajv": "^8.9.0", + "ajv-formats": "^2.1.1", + "ajv-keywords": "^5.1.0" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, + "node_modules/tiny-invariant": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", + "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==", + "license": "MIT" + }, + "node_modules/tr46": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-1.0.1.tgz", + "integrity": "sha512-dTpowEjclQ7Kgx5SdBkqRzVhERQXov8/l9Ft9dVM9fmg0W0KQSVaXX9T4i6twCPNtYiZM53lpSSUAwJbFPOHxA==", + "license": "MIT", + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "license": "0BSD" + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", + "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", + "license": "MIT" + }, + "node_modules/update-browserslist-db": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz", + "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "escalade": "^3.2.0", + "picocolors": "^1.1.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "license": "BSD-2-Clause", + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, + "node_modules/watchpack": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.5.1.tgz", + "integrity": "sha512-Zn5uXdcFNIA1+1Ei5McRd+iRzfhENPCe7LeABkJtNulSxjma+l7ltNx55BWZkRlwRnpOgHqxnjyaDgJnNXnqzg==", + "license": "MIT", + "dependencies": { + "glob-to-regexp": "^0.4.1", + "graceful-fs": "^4.1.2" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/webidl-conversions": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-4.0.2.tgz", + "integrity": "sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg==", + "license": "BSD-2-Clause" + }, + "node_modules/webpack": { + "version": "5.96.1", + "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.96.1.tgz", + "integrity": "sha512-l2LlBSvVZGhL4ZrPwyr8+37AunkcYj5qh8o6u2/2rzoPc8gxFJkLj1WxNgooi9pnoc06jh0BjuXnamM4qlujZA==", + "license": "MIT", + "peer": true, + "dependencies": { + "@types/eslint-scope": "^3.7.7", + "@types/estree": "^1.0.6", + "@webassemblyjs/ast": "^1.12.1", + "@webassemblyjs/wasm-edit": "^1.12.1", + "@webassemblyjs/wasm-parser": "^1.12.1", + "acorn": "^8.14.0", + "browserslist": "^4.24.0", + "chrome-trace-event": "^1.0.2", + "enhanced-resolve": "^5.17.1", + "es-module-lexer": "^1.2.1", + "eslint-scope": "5.1.1", + "events": "^3.2.0", + "glob-to-regexp": "^0.4.1", + "graceful-fs": "^4.2.11", + "json-parse-even-better-errors": "^2.3.1", + "loader-runner": "^4.2.0", + "mime-types": "^2.1.27", + "neo-async": "^2.6.2", + "schema-utils": "^3.2.0", + "tapable": "^2.1.1", + "terser-webpack-plugin": "^5.3.10", + "watchpack": "^2.4.1", + "webpack-sources": "^3.2.3" + }, + "bin": { + "webpack": "bin/webpack.js" + }, + "engines": { + "node": ">=10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependenciesMeta": { + "webpack-cli": { + "optional": true + } + } + }, + "node_modules/webpack-sources": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/webpack-sources/-/webpack-sources-3.3.3.tgz", + "integrity": "sha512-yd1RBzSGanHkitROoPFd6qsrxt+oFhg/129YzheDGqeustzX0vTZJZsSsQjVQC4yzBQ56K55XU8gaNCtIzOnTg==", + "license": "MIT", + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/whatwg-url": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-7.1.0.tgz", + "integrity": "sha512-WUu7Rg1DroM7oQvGWfOiAK21n74Gg+T4elXEQYkOhtyLeWiJFoOGLXPKI/9gzIie9CtwVLm8wtw6YJdKyxSjeg==", + "license": "MIT", + "dependencies": { + "lodash.sortby": "^4.7.0", + "tr46": "^1.0.1", + "webidl-conversions": "^4.0.2" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC" + }, + "node_modules/ws": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz", + "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC" + }, + "node_modules/yauzl": { + "version": "2.10.0", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", + "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "license": "MIT", + "dependencies": { + "buffer-crc32": "~0.2.3", + "fd-slicer": "~1.1.0" + } + }, + "node_modules/zod": { + "version": "3.22.3", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.22.3.tgz", + "integrity": "sha512-EjIevzuJRiRPbVH4mGc8nApb/lVLKVpmUhAaR5R5doKGfAnGJ6Gr3CViAVjP+4FWSxCsybeWQdcgCtbX+7oZug==", + "license": "MIT", + "peer": true, + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/promo/package.json b/promo/package.json new file mode 100644 index 0000000..9ce67d0 --- /dev/null +++ b/promo/package.json @@ -0,0 +1,23 @@ +{ + "name": "kitedb-promo", + "version": "1.0.0", + "description": "KiteDB promo video", + "type": "module", + "scripts": { + "dev": "remotion studio", + "build": "remotion render KiteDBPromo out/promo.mp4", + "preview": "remotion preview" + }, + "dependencies": { + "@remotion/bundler": "^4.0.420", + "@remotion/cli": "^4.0.420", + "react": "^19.2.4", + "react-dom": "^19.2.4", + "remotion": "^4.0.420" + }, + "devDependencies": { + "@types/react": "^19.2.13", + "@types/react-dom": "^19.2.3", + "typescript": "^5.9.3" + } +} diff --git a/promo/remotion.config.ts b/promo/remotion.config.ts new file mode 100644 index 0000000..e8d4dba --- /dev/null +++ b/promo/remotion.config.ts @@ -0,0 +1,4 @@ +import { Config } from "@remotion/cli/config"; + +Config.setVideoImageFormat("jpeg"); +Config.setOverwriteOutput(true); diff --git a/promo/src/KiteDBPromo.tsx b/promo/src/KiteDBPromo.tsx new file mode 100644 index 0000000..a839317 --- /dev/null +++ b/promo/src/KiteDBPromo.tsx @@ -0,0 +1,1224 @@ +import { + AbsoluteFill, + interpolate, + Sequence, + spring, + useCurrentFrame, + useVideoConfig, + Easing, +} from "remotion"; +import { KiteLogo } from "./KiteLogo"; +import { theme } from "./theme"; + +// ============================================================================ +// SHARED COMPONENTS +// ============================================================================ + +// Background with grid and glow - persistent across all scenes +const Background: React.FC = () => { + const frame = useCurrentFrame(); + const gridOffset = frame * 0.3; + + return ( + + {/* Animated Grid */} +
+ + {/* Speed lines */} +
+ + ); +}; + +// Blinking cursor component +const Cursor: React.FC<{ frame: number; visible?: boolean }> = ({ + frame, + visible = true, +}) => { + if (!visible) return null; + const opacity = Math.sin(frame * 0.2) > 0 ? 1 : 0; + return ( + + ); +}; + +// Terminal window wrapper +const Terminal: React.FC<{ + title: string; + children: React.ReactNode; + width?: number; + opacity?: number; + scale?: number; + glow?: boolean; +}> = ({ title, children, width = 800, opacity = 1, scale = 1, glow = false }) => { + return ( +
+ {/* Terminal header */} +
+
+
+
+ + {title} + +
+ + {/* Terminal content */} +
{children}
+
+ ); +}; + +// Hero text - clean solid style +const HeroText: React.FC<{ + children: string; + delay?: number; + fontSize?: number; + subtle?: boolean; +}> = ({ children, delay = 0, fontSize = 64, subtle = false }) => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + const progress = spring({ + frame: frame - delay, + fps, + config: { damping: 20, stiffness: 100 }, + }); + + const opacity = interpolate(progress, [0, 1], [0, 1], { + extrapolateRight: "clamp", + }); + const translateY = interpolate(progress, [0, 1], [30, 0], { + extrapolateRight: "clamp", + }); + + return ( +
+ {children} +
+ ); +}; + +// ============================================================================ +// SCENE 1: INSTANT HOOK (0-3s / 0-90 frames) +// ============================================================================ + +// Title card - static, use as thumbnail +const Scene0_Title: React.FC = () => { + return ( + +
+ KiteDB +
+
+ The fastest graph database +
+
+ ); +}; + +const Scene1_InstantHook: React.FC = () => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + // Query text types instantly + const queryText = "db.from(alice).out(Knows).toArray()"; + const typedChars = Math.min( + queryText.length, + Math.floor(interpolate(frame, [8, 25], [0, queryText.length], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + easing: Easing.out(Easing.cubic), + })) + ); + + // Result appears FAST + const showResult = frame > 28; + const resultOpacity = interpolate(frame, [28, 35], [0, 1], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }); + + // Terminal entry + const terminalProgress = spring({ + frame: frame - 2, + fps, + config: { damping: 20, stiffness: 120 }, + }); + const terminalOpacity = interpolate(terminalProgress, [0, 1], [0, 1], { + extrapolateRight: "clamp", + }); + const terminalScale = interpolate(terminalProgress, [0, 1], [0.95, 1], { + extrapolateRight: "clamp", + }); + + // Text entry + const textProgress = spring({ + frame: frame - 45, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + return ( + + +
+ + + {queryText.slice(0, typedChars)} + + +
+ + {showResult && ( +
+ {" "} + 5 results in{" "} + + 417ns + +
+ )} +
+ + {/* Hook text */} +
+ Databases shouldn't feel slow. +
+
+ ); +}; + +// ============================================================================ +// SCENE 2: SPEED PROOF (3-8s / 90-240 frames) +// ============================================================================ + +// Benchmark bar component for visual comparison +const BenchmarkBar: React.FC<{ + label: string; + value: string; + rawNs: number; + maxNs: number; + color: string; + delay: number; + isWinner?: boolean; +}> = ({ label, value, rawNs, maxNs, color, delay, isWinner = false }) => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + const entryProgress = spring({ + frame: frame - delay, + fps, + config: { damping: 20, stiffness: 100 }, + }); + + const barWidth = interpolate(entryProgress, [0, 1], [0, (rawNs / maxNs) * 100], { + extrapolateRight: "clamp", + }); + + const opacity = interpolate(entryProgress, [0, 0.3], [0, 1], { + extrapolateRight: "clamp", + }); + + const glowPulse = isWinner ? interpolate( + Math.sin((frame - delay) * 0.15), + [-1, 1], + [0.5, 1] + ) : 0; + + return ( +
+
+ + {label} + {isWinner && } + + + {value} + +
+
+
+
+
+ ); +}; + +const Scene2_SpeedProof: React.FC = () => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + // Benchmark data: 10K nodes, 20K edges + // KiteDB: p50 708ns, Memgraph: p50 338.17µs + const kitedbNs = 708; + const memgraphNs = 338170; // 338.17µs in ns + const speedup = Math.round(memgraphNs / kitedbNs); + + const headerProgress = spring({ + frame: frame - 5, + fps, + config: { damping: 20, stiffness: 100 }, + }); + + const speedupProgress = spring({ + frame: frame - 70, + fps, + config: { damping: 12, stiffness: 60 }, + }); + + const subtitleProgress = spring({ + frame: frame - 100, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + return ( + + {/* Header with dataset info */} +
+
+ Graph Traversal Benchmark +
+
+ 10K nodes + + 20K edges + + p50 latency +
+
+ + {/* Benchmark comparison */} +
+ + +
+ + {/* Speedup callout */} +
+ + {speedup}x + + + faster + +
+ + {/* Subtitle */} +
+ + Sub-microsecond queries. Zero compromise. + +
+
+ ); +}; + +// ============================================================================ +// SCENE 3: FLUENT QUERY SYNTAX (8-14s / 240-420 frames) +// ============================================================================ + +const Scene3_FluentSyntax: React.FC = () => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + const fullCode = `const db = await kite('./social.kitedb') + +// Traverse relationships fluently +const friends = db + .from(alice) + .out(Knows) + .whereNode(n => n.get("active")) + .toArray() // 284ns + +// Find shortest path +const path = db + .shortestPath(alice).to(bob) + .via(Knows) + .dijkstra() // 1.2µs`; + + // Typewriter effect + const typingSpeed = 2; + const typedChars = Math.floor( + interpolate(frame, [15, 15 + fullCode.length / typingSpeed], [0, fullCode.length], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }) + ); + + const displayedCode = fullCode.slice(0, typedChars); + + // Syntax highlighting + const highlightCode = (code: string) => { + const tokens: { text: string; type: string }[] = []; + let remaining = code; + + const keywords = ["const", "await"]; + const functions = ["kite", "from", "out", "where", "toArray", "shortestPath", "to", "via", "dijkstra", "get"]; + const types = ["Knows"]; + const variables = ["db", "friends", "alice", "bob", "path", "n"]; + + while (remaining.length > 0) { + // Comments + const commentMatch = remaining.match(/^\/\/[^\n]*/); + if (commentMatch) { + tokens.push({ text: commentMatch[0], type: "comment" }); + remaining = remaining.slice(commentMatch[0].length); + continue; + } + + // Strings + const stringMatch = remaining.match(/^'[^']*'?/); + if (stringMatch) { + tokens.push({ text: stringMatch[0], type: "string" }); + remaining = remaining.slice(stringMatch[0].length); + continue; + } + + // Arrow function + const arrowMatch = remaining.match(/^=>/); + if (arrowMatch) { + tokens.push({ text: "=>", type: "punctuation" }); + remaining = remaining.slice(2); + continue; + } + + // Words + const wordMatch = remaining.match(/^[a-zA-Z_][a-zA-Z0-9_]*/); + if (wordMatch) { + const word = wordMatch[0]; + let type = "default"; + if (keywords.includes(word)) type = "keyword"; + else if (functions.includes(word)) type = "function"; + else if (types.includes(word)) type = "type"; + else if (variables.includes(word)) type = "variable"; + tokens.push({ text: word, type }); + remaining = remaining.slice(word.length); + continue; + } + + tokens.push({ text: remaining[0], type: "punctuation" }); + remaining = remaining.slice(1); + } + + return tokens; + }; + + const tokens = highlightCode(displayedCode); + + const getColor = (type: string) => { + switch (type) { + case "keyword": return theme.codeKeyword; + case "function": return theme.codeFunction; + case "type": return theme.codeType; + case "variable": return theme.codeVariable; + case "string": return theme.codeString; + case "comment": return theme.codeComment; + default: return theme.mutedForeground; + } + }; + + // Terminal entry + const entryProgress = spring({ + frame, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + const opacity = interpolate(entryProgress, [0, 1], [0, 1], { + extrapolateRight: "clamp", + }); + const scale = interpolate(entryProgress, [0, 1], [0.95, 1], { + extrapolateRight: "clamp", + }); + + // Hero text + const textProgress = spring({ + frame: frame - 30, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + return ( + + {/* Code editor */} +
+ +
+            {tokens.map((token, i) => (
+              
+                {token.text}
+              
+            ))}
+            
+          
+
+
+ + {/* Subtitle */} +
+ Queries that read like thought. +
+
+ ); +}; + +// ============================================================================ +// SCENE 4: DEVELOPER FLOW (14-20s / 420-600 frames) +// ============================================================================ + +const FlowSnippet: React.FC<{ + code: string; + result: string; + delay: number; + position: { x: number; y: number }; +}> = ({ code, result, delay, position }) => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + const entryProgress = spring({ + frame: frame - delay, + fps, + config: { damping: 20, stiffness: 150 }, + }); + + const opacity = interpolate(entryProgress, [0, 1], [0, 1], { + extrapolateRight: "clamp", + }); + const translateY = interpolate(entryProgress, [0, 1], [30, 0], { + extrapolateRight: "clamp", + }); + + // Result appears after typing + const showResult = frame - delay > 25; + const resultOpacity = interpolate(frame - delay, [25, 35], [0, 1], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }); + + // Exit animation + const exitProgress = interpolate(frame - delay, [50, 60], [0, 1], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }); + const exitOpacity = 1 - exitProgress; + + return ( +
+
+
{code}
+ {showResult && ( +
+ → {result} +
+ )} +
+
+ ); +}; + +const Scene4_DeveloperFlow: React.FC = () => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + const snippets = [ + { code: ".whereNode(n => n.age > 25)", result: "847 nodes • 312ns", delay: 0, position: { x: 180, y: 200 } }, + { code: ".out(WorksAt)", result: "3.2K edges • 89ns", delay: 30, position: { x: 600, y: 350 } }, + { code: ".nodes()", result: "312 unique • 47ns", delay: 60, position: { x: 280, y: 500 } }, + { code: ".take(10)", result: "limited • 8ns", delay: 90, position: { x: 720, y: 250 } }, + { code: ".toArray()", result: "done ✓ • 156ns", delay: 120, position: { x: 450, y: 400 } }, + ]; + + // Center text + const textProgress = spring({ + frame: frame - 60, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + return ( + + {/* Rapid snippets flying in */} + {snippets.map((snippet) => ( + + ))} + + {/* Center text */} + +
+ Stay in flow. +
+
+
+ ); +}; + +// ============================================================================ +// SCENE 5: BUILT FOR SPEED (20-25s / 600-750 frames) +// ============================================================================ + +const SpeedParticle: React.FC<{ + startX: number; + startY: number; + speed: number; + delay: number; + length: number; +}> = ({ startX, startY, speed, delay, length }) => { + const frame = useCurrentFrame(); + + const progress = ((frame - delay) * speed) % 2000; + const x = startX + progress; + const opacity = interpolate(progress, [0, 100, 1800, 2000], [0, 0.6, 0.6, 0], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }); + + return ( +
+ ); +}; + +const Scene5_Performance: React.FC = () => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + // Generate particles + const particles = Array.from({ length: 20 }, (_, i) => ({ + startX: -200 - (i * 100), + startY: 100 + (i * 45), + speed: 8 + (i % 5) * 2, + delay: i * 3, + length: 80 + (i % 3) * 40, + })); + + // Metrics that fade in + const metrics = [ + { label: "Zero-copy mmap", delay: 15 }, + { label: "CSR adjacency", delay: 28 }, + { label: "MVCC snapshots", delay: 41 }, + { label: "No network hops", delay: 54 }, + { label: "Single file", delay: 67 }, + ]; + + // Text + const textProgress = spring({ + frame: frame - 10, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + return ( + + {/* Speed particles */} + {particles.map((p) => ( + + ))} + + {/* Content */} + +
+ Designed for performance. +
+ + {/* Metrics */} +
+ {metrics.map((metric, i) => { + const metricProgress = spring({ + frame: frame - metric.delay, + fps, + config: { damping: 20, stiffness: 100 }, + }); + const metricOpacity = interpolate(metricProgress, [0, 1], [0, 1], { + extrapolateRight: "clamp", + }); + const metricTranslate = interpolate(metricProgress, [0, 1], [15, 0], { + extrapolateRight: "clamp", + }); + + return ( +
+ {metric.label} +
+ ); + })} +
+
+
+ ); +}; + +// ============================================================================ +// SCENE 6: INSTALLATION + END CARD (25-30s / 750-900 frames) +// ============================================================================ + +const Scene6_EndCard: React.FC = () => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + // Terminal typing + const command = "npm install @kitedb/core"; + const typedChars = Math.floor( + interpolate(frame, [20, 50], [0, command.length], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }) + ); + + // Entry animations + const terminalProgress = spring({ + frame: frame - 5, + fps, + config: { damping: 20, stiffness: 100 }, + }); + + const logoProgress = spring({ + frame: frame - 60, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + const taglineProgress = spring({ + frame: frame - 75, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + const urlProgress = spring({ + frame: frame - 90, + fps, + config: { damping: 15, stiffness: 80 }, + }); + + // Show success after typing + const showSuccess = frame > 55; + const successOpacity = interpolate(frame, [55, 60], [0, 1], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }); + + return ( + + {/* Install command */} +
+ +
+ + + {command.slice(0, typedChars)} + + +
+ {showSuccess && ( +
+ {" "} + added 1 package +
+ )} +
+
+ + {/* Tagline */} +
+ Install. Query. Ship. +
+ + {/* Logo */} +
+ +
+ + {/* URL */} +
+ kitedb.vercel.app +
+
+ ); +}; + +// ============================================================================ +// MAIN COMPOSITION +// ============================================================================ + +export const KiteDBPromo: React.FC = () => { + const { fps } = useVideoConfig(); + + return ( + + + + {/* Scene 0: Title (0-250ms / ~8 frames at 30fps) */} + + + + + {/* Scene 1: Hook (250ms-3.25s) */} + + + + + {/* Scene 2: Speed Proof (3.25-8.25s) */} + + + + + {/* Scene 3: Fluent Query Syntax (8.25-14.25s) */} + + + + + {/* Scene 4: Developer Flow (14.25-20.25s) */} + + + + + {/* Scene 5: Built for Speed (20.25-25.25s) */} + + + + + {/* Scene 6: Installation + End Card (25.25-30.25s) */} + + + + + ); +}; diff --git a/promo/src/KiteLogo.tsx b/promo/src/KiteLogo.tsx new file mode 100644 index 0000000..786ed56 --- /dev/null +++ b/promo/src/KiteLogo.tsx @@ -0,0 +1,311 @@ +import { interpolate, spring, useCurrentFrame, useVideoConfig } from "remotion"; + +interface KiteLogoProps { + scale?: number; + showGlow?: boolean; + animateIn?: boolean; + delay?: number; +} + +// Node positions +const CENTER = { x: 108, y: 108 }; +const NODES = [ + { x: 100, y: 20, color: "#06B6D4" }, // top + { x: 175, y: 90, color: "#06B6D4" }, // right + { x: 115, y: 210, color: "#3B82F6" }, // bottom + { x: 35, y: 105, color: "#06B6D4" }, // left +]; + +// Edge paths from center to each node +const EDGES = NODES.map((node) => ({ + from: CENTER, + to: node, +})); + +// Outer edges connecting nodes (clockwise) +const OUTER_EDGES = [ + { from: NODES[0], to: NODES[1] }, // top -> right + { from: NODES[1], to: NODES[2] }, // right -> bottom + { from: NODES[2], to: NODES[3] }, // bottom -> left + { from: NODES[3], to: NODES[0] }, // left -> top +]; + +export const KiteLogo: React.FC = ({ + scale = 1, + showGlow = true, + animateIn = true, + delay = 0, +}) => { + const frame = useCurrentFrame(); + const { fps } = useVideoConfig(); + + const localFrame = frame - delay; + + // Phase 1: Center node appears (frames 0-15) + const centerProgress = animateIn + ? spring({ + frame: localFrame, + fps, + config: { damping: 12, stiffness: 200 }, + }) + : 1; + + // Phase 2: Edges grow outward from center (frames 8-35) + const edgeProgress = animateIn + ? interpolate(localFrame, [8, 35], [0, 1], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }) + : 1; + + // Phase 3: Outer nodes pop in sequentially (frames 20-50) + const nodeDelays = [20, 26, 32, 38]; + const nodeProgresses = NODES.map((_, i) => + animateIn + ? spring({ + frame: localFrame - nodeDelays[i], + fps, + config: { damping: 10, stiffness: 300 }, + }) + : 1 + ); + + // Phase 4: Outer edges connect (frames 35-60) + const outerEdgeDelays = [35, 40, 45, 50]; + const outerEdgeProgresses = OUTER_EDGES.map((_, i) => + animateIn + ? interpolate(localFrame, [outerEdgeDelays[i], outerEdgeDelays[i] + 12], [0, 1], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }) + : 1 + ); + + // Traveling pulse effect on edges (continuous after initial animation) + const pulsePhase = localFrame * 0.15; + + // Glow pulse animation + const glowPulse = interpolate( + Math.sin(localFrame * 0.08), + [-1, 1], + [0.15, 0.4] + ); + + // Center ring rotation + const ringRotation = localFrame * 2; + + // Overall fade in + const opacity = animateIn + ? interpolate(localFrame, [0, 10], [0, 1], { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", + }) + : 1; + + // Calculate edge path with animated length + const getEdgePath = (from: { x: number; y: number }, to: { x: number; y: number }, progress: number) => { + const currentX = from.x + (to.x - from.x) * progress; + const currentY = from.y + (to.y - from.y) * progress; + return `M${from.x} ${from.y}L${currentX} ${currentY}`; + }; + + // Calculate pulse position along edge + const getPulsePosition = (from: { x: number; y: number }, to: { x: number; y: number }, t: number) => { + const wrapped = ((t % 1) + 1) % 1; + return { + x: from.x + (to.x - from.x) * wrapped, + y: from.y + (to.y - from.y) * wrapped, + }; + }; + + return ( + + KiteDB Logo + {/* Neon Background Glow */} + {showGlow && ( + + )} + + {/* The Kite Fill - fades in after structure complete */} + + + {/* Internal Edges - grow from center */} + + {EDGES.map((edge, i) => ( + 0.5 ? "drop-shadow(0 0 4px #00F0FF)" : "none", + }} + /> + ))} + + + {/* Outer Edges - connect nodes sequentially */} + + {OUTER_EDGES.map((edge, i) => ( + 0.5 ? "drop-shadow(0 0 4px #00F0FF)" : "none", + }} + /> + ))} + + + {/* Traveling pulses on edges (only after edges are drawn) */} + {edgeProgress >= 1 && EDGES.map((edge, i) => { + const pulsePos = getPulsePosition(edge.from, edge.to, pulsePhase + i * 0.25); + const pulseOpacity = interpolate( + Math.sin(pulsePhase * 3 + i), + [-1, 1], + [0.3, 0.9] + ); + return ( + + ); + })} + + {/* Outer Nodes - pop in sequentially */} + {NODES.map((node, i) => { + const np = nodeProgresses[i]; + const nodeScale = interpolate(np, [0, 1], [0, 1], { extrapolateRight: "clamp" }); + const nodeOpacity = interpolate(np, [0, 0.3], [0, 1], { extrapolateRight: "clamp" }); + + return ( + + {/* Node glow on appear */} + {np > 0 && np < 1 && ( + + )} + + + ); + })} + + {/* Center Node - appears first with pulse ring */} + + {/* Expanding ring on appear */} + {centerProgress > 0 && centerProgress < 1 && ( + + )} + + {/* Main center node */} + 0.5 ? "drop-shadow(0 0 6px #00F0FF)" : "none", + }} + /> + + {/* Rotating dashed ring */} + + + + + + + + + + + + + + + + + + + ); +}; diff --git a/promo/src/Root.tsx b/promo/src/Root.tsx new file mode 100644 index 0000000..c98d3a7 --- /dev/null +++ b/promo/src/Root.tsx @@ -0,0 +1,15 @@ +import { Composition } from "remotion"; +import { KiteDBPromo } from "./KiteDBPromo"; + +export const RemotionRoot: React.FC = () => { + return ( + + ); +}; diff --git a/promo/src/index.ts b/promo/src/index.ts new file mode 100644 index 0000000..f31c790 --- /dev/null +++ b/promo/src/index.ts @@ -0,0 +1,4 @@ +import { registerRoot } from "remotion"; +import { RemotionRoot } from "./Root"; + +registerRoot(RemotionRoot); diff --git a/promo/src/theme.ts b/promo/src/theme.ts new file mode 100644 index 0000000..8792681 --- /dev/null +++ b/promo/src/theme.ts @@ -0,0 +1,45 @@ +// KiteDB Brand Colors +export const theme = { + // Dark mode background colors + background: "#05070d", + foreground: "#f5f9ff", + card: "#0b1220", + muted: "#131d2d", + mutedForeground: "#9aa8ba", + border: "#1a2a42", + + // Neon accent colors + neon400: "#52c4ff", + neon500: "#2aa7ff", + neon600: "#0d8bf5", + electric: "#00d4ff", + accent: "#2af2ff", + accentStrong: "#38f7c9", + + // Code syntax colors + codeKeyword: "#ff79c6", + codeString: "#50fa7b", + codeNumber: "#bd93f9", + codeComment: "#6272a4", + codeFunction: "#00d4ff", + codeVariable: "#f8f8f2", + codeType: "#8be9fd", + + // Terminal colors + terminalRed: "#ff5f57", + terminalYellow: "#febc2e", + terminalGreen: "#28c840", + + // Fonts + fontMono: "'JetBrains Mono', 'SF Mono', Consolas, monospace", + fontSans: "'Space Grotesk', 'Inter', system-ui, sans-serif", +} as const; + +// Gradient definitions +export const gradients = { + neonText: "linear-gradient(120deg, #2af2ff 0%, #38f7c9 45%, #0d8bf5 100%)", + edgeGradient: "linear-gradient(180deg, #00F0FF 0%, #2563EB 100%)", + kiteFill: "linear-gradient(180deg, #22D3EE 0%, #1E40AF 100%)", + glowA: "rgba(42, 242, 255, 0.14)", + glowB: "rgba(56, 247, 201, 0.12)", +} as const; diff --git a/promo/tsconfig.json b/promo/tsconfig.json new file mode 100644 index 0000000..fe98f87 --- /dev/null +++ b/promo/tsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "jsx": "react-jsx", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "allowSyntheticDefaultImports": true, + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*"], + "exclude": ["node_modules"] +} diff --git a/ray-docs/src/lib/docs.ts b/ray-docs/src/lib/docs.ts index 6f29200..f67f94d 100644 --- a/ray-docs/src/lib/docs.ts +++ b/ray-docs/src/lib/docs.ts @@ -85,7 +85,7 @@ export const docsStructure: DocSection[] = [ }, { title: "Low-Level API", - description: "Direct storage access", + description: "Direct database primitives", slug: "api/low-level", }, { diff --git a/ray-docs/src/routes/docs/$.tsx b/ray-docs/src/routes/docs/$.tsx index 08ffcc4..ec68050 100644 --- a/ray-docs/src/routes/docs/$.tsx +++ b/ray-docs/src/routes/docs/$.tsx @@ -96,7 +96,7 @@ function DocPageContent(props: { slug: string }) { traversals
  • - Vector search – HNSW-indexed similarity queries + Vector search – IVF-based similarity queries
  • Embedded – Runs in your process, no server needed diff --git a/ray-docs/src/routes/docs/api/$.tsx b/ray-docs/src/routes/docs/api/$.tsx index 5b8f41e..986780e 100644 --- a/ray-docs/src/routes/docs/api/$.tsx +++ b/ray-docs/src/routes/docs/api/$.tsx @@ -117,7 +117,7 @@ db.countEdges(follows)`}

    Next Steps

    @@ -128,46 +128,64 @@ db.countEdges(follows)`} return (

    - The low-level API provides direct access to the underlying storage - engine for advanced use cases. + The low-level API uses the Database class for direct + graph operations, transaction control, and batched writes.

    -

    Storage Access

    +

    Open and Write

    Batch Operations

    +db.addEdgesBatch(edges); // Array<{ src, etype, dst }> +db.addEdgesWithPropsBatch(edgesWithProps); +db.commit(); + +// Optional maintenance checkpoint after ingest +db.checkpoint();`} language="typescript" /> -

    Iterators

    +

    Streaming and Pagination

    diff --git a/ray-docs/src/routes/docs/getting-started/$.tsx b/ray-docs/src/routes/docs/getting-started/$.tsx index a3dae51..61c6082 100644 --- a/ray-docs/src/routes/docs/getting-started/$.tsx +++ b/ray-docs/src/routes/docs/getting-started/$.tsx @@ -68,7 +68,7 @@ function DocPageContent(props: { slug: string }) { typescript={`import { kite } from '@kitedb/core'; // Define schema inline when opening the database -const db = kite('./social.kitedb', { +const db = await kite('./social.kitedb', { nodes: [ { name: 'user', @@ -151,7 +151,7 @@ let bob = db.insert("user") .returning()?; // Create a follow relationship -db.link(alice.id, "follows", bob.id, Some(json!({ +db.link(alice.id(), "follows", bob.id(), Some(json!({ "followedAt": std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH)? .as_secs() @@ -185,14 +185,14 @@ const followsBob = db.hasEdge(alice.id, 'follows', bob.id); console.log('Alice follows Bob:', followsBob);`} rust={`// Find all users Alice follows let following = db - .from(alice.id) + .from(alice.id()) .out(Some("follows")) .nodes()?; println!("Alice follows: {} users", following.len()); // Check if Alice follows Bob -let follows_bob = db.has_edge(alice.id, "follows", bob.id)?; +let follows_bob = db.has_edge(alice.id(), "follows", bob.id())?; println!("Alice follows Bob: {}", follows_bob);`} python={`# Find all users Alice follows following = (db diff --git a/ray-docs/src/routes/docs/getting-started/installation.tsx b/ray-docs/src/routes/docs/getting-started/installation.tsx index 2eef324..d10105f 100644 --- a/ray-docs/src/routes/docs/getting-started/installation.tsx +++ b/ray-docs/src/routes/docs/getting-started/installation.tsx @@ -20,8 +20,8 @@ function InstallationPage() {

    Requirements

      -
    • JavaScript/TypeScript: Bun 1.0+, Node.js 18+, or Deno
    • -
    • Rust: Rust 1.70+
    • +
    • JavaScript/TypeScript: Bun 1.0+ or Node.js 16+
    • +
    • Rust: Stable Rust toolchain
    • Python: Python 3.9+
    @@ -31,7 +31,7 @@ function InstallationPage() { typescript={`import { kite } from '@kitedb/core'; // Open database with a simple schema -const db = kite('./test.kitedb', { +const db = await kite('./test.kitedb', { nodes: [ { name: 'user', diff --git a/ray-docs/src/routes/docs/guides/$.tsx b/ray-docs/src/routes/docs/guides/$.tsx index ae9dd14..5714007 100644 --- a/ray-docs/src/routes/docs/guides/$.tsx +++ b/ray-docs/src/routes/docs/guides/$.tsx @@ -67,7 +67,7 @@ function DocPageContent(props: { slug: string }) { = db.all("user")?.collect(); // Count nodes -let user_count = db.count_nodes(Some("user"))?;`} +let user_count = db.count_nodes_by_type("user")?;`} python={`# Get by key user = db.get(user, "alice") -# Get by node ID -user_by_id = db.get_by_id(alice.id) +# Get lightweight ref by key +user_ref = db.get_ref(user, "alice") # Check if exists -exists = db.exists(alice.id) +exists = alice is not None and db.exists(alice) # List all nodes of a type -all_users = db.all(user) +all_users = list(db.all(user)) # Count nodes -user_count = db.count_nodes("user")`} +user_count = db.count(user)`} />

    Updating Data

    @@ -299,35 +301,33 @@ db.update(user, 'alice') .unset('email') .execute();`} rust={`// Update by node ID -db.update_by_id(alice.id) - .set("name", "Alice C.") +db.update_by_id(alice.id()) + .set("name", PropValue::String("Alice C.".into())) .execute()?; // Update multiple properties -db.update_by_id(alice.id) - .set_all(json!({ - "name": "Alice Chen", - "email": "newemail@example.com" - })) +db.update_by_id(alice.id()) + .set("name", PropValue::String("Alice Chen".into())) + .set("email", PropValue::String("newemail@example.com".into())) .execute()?; // Remove a property -db.update_by_id(alice.id) +db.update_by_id(alice.id()) .unset("email") .execute()?;`} - python={`# Update by node ID -(db.update_by_id(alice.id) - .set("name", "Alice C.") + python={`# Update by node reference +(db.update(alice) + .set(name="Alice C.") .execute()) # Update multiple properties -(db.update_by_id(alice.id) - .set_all({"name": "Alice Chen", "email": "newemail@example.com"}) +(db.update(alice) + .set({"name": "Alice Chen", "email": "newemail@example.com"}) .execute()) -# Remove a property -(db.update_by_id(alice.id) - .unset("email") +# Update another property +(db.update(alice) + .set(email="newemail@example.com") .execute())`} /> @@ -339,15 +339,19 @@ db.deleteById(alice.id); // Delete by key db.deleteByKey('user', 'alice');`} rust={`// Delete by node ID -db.delete_by_id(alice.id)?; +db.delete_node(alice.id())?; -// Delete by key -db.delete_by_key("user", "alice")?;`} - python={`# Delete by node ID -db.delete_by_id(alice.id) +// Delete by key (lookup then delete) +if let Some(node) = db.get("user", "alice")? { + db.delete_node(node.id())?; +}`} + python={`# Delete by node reference +db.delete(alice) -# Delete by key -db.delete_by_key(user, "alice")`} +# Delete by key (lookup then delete) +node = db.get(user, "alice") +if node is not None: + db.delete(node)`} />

    Next Steps

    @@ -388,19 +392,19 @@ const connections = db .nodes();`} rust={`// Find all users that Alice follows (outgoing edges) let following = db - .from(alice.id) + .from(alice.id()) .out(Some("follows")) .nodes()?; // Find all followers of Alice (incoming edges) let followers = db - .from(alice.id) + .from(alice.id()) .in_(Some("follows")) .nodes()?; // Follow edges in both directions let connections = db - .from(alice.id) + .from(alice.id()) .both(Some("knows")) .nodes()?;`} python={`# Find all users that Alice follows (outgoing edges) @@ -442,14 +446,14 @@ const authorsOfLikedArticles = db .nodes();`} rust={`// Find friends of friends (2-hop) let friends_of_friends = db - .from(alice.id) + .from(alice.id()) .out(Some("follows")) .out(Some("follows")) .nodes()?; // Chain different edge types let authors_of_liked = db - .from(alice.id) + .from(alice.id()) .out(Some("likes")) // Alice -> Articles .in_(Some("authored")) // Articles <- Users .nodes()?;`} @@ -486,7 +490,7 @@ const topConnections = db .nodes();`} rust={`// Traverse 1-3 hops let network = db - .from(alice.id) + .from(alice.id()) .traverse(Some("follows"), TraverseOptions { min_depth: Some(1), max_depth: 3, @@ -496,7 +500,7 @@ let network = db // Limit results let top_connections = db - .from(alice.id) + .from(alice.id()) .out(Some("follows")) .take(10) .nodes()?;`} @@ -574,7 +578,7 @@ index.set(doc.id, embedding);`} let embedding: Vec = get_embedding("Your document content")?; // Store the vector, associated with a node ID -index.set(doc.id, &embedding)?;`} +index.set(doc.id(), &embedding)?;`} python={`# Generate embedding with your preferred provider response = openai.embeddings.create( model="text-embedding-ada-002", @@ -641,13 +645,13 @@ index.buildIndex(); const stats = index.stats(); console.log(\`Total vectors: \${stats.totalVectors}\`);`} rust={`// Check if a node has a vector -let has_vector = index.has(doc.id)?; +let has_vector = index.has(doc.id())?; // Get a stored vector -let vector = index.get(doc.id)?; +let vector = index.get(doc.id())?; // Delete a vector -index.delete(doc.id)?; +index.delete(doc.id())?; // Build/rebuild the IVF index for faster search index.build_index()?; @@ -713,7 +717,7 @@ let mut db = Kite::open("./my.kitedb", options)?; db.transaction(|ctx| { let alice = ctx.create_node("user", "alice", HashMap::new())?; let bob = ctx.create_node("user", "bob", HashMap::new())?; - ctx.link(alice.id, "follows", bob.id)?; + ctx.link(alice.id(), "follows", bob.id())?; Ok(()) })?;`} python={`from kitedb import kite @@ -793,7 +797,7 @@ db.commit()`} Max throughput, single writer - begin_bulk() + batch APIs + beginBulk() + batch APIs Atomic ingest w/ MVCC @@ -801,7 +805,7 @@ db.commit()`} Multi-writer throughput - sync_mode=Normal + group commit + chunked batches + syncMode: 'Normal' + group commit + chunked batches @@ -955,7 +959,7 @@ if db.has_transaction(): Max ingest throughput, single writer - begin_bulk() + batch APIs + beginBulk() + batch APIs Atomic ingest with MVCC @@ -963,15 +967,15 @@ if db.has_transaction(): Multi-writer throughput - sync_mode=Normal + group commit (1-2ms) + syncMode: 'Normal' + group commit (1-2ms) Strong durability per commit - sync_mode=Full + syncMode: 'Full' Throwaway or test data - sync_mode=Off + syncMode: 'Off' @@ -1020,32 +1024,32 @@ db.commit()`} Single-writer ingest - sync_mode=Normal, group_commit=false, - WAL ≥ 256MB, auto_checkpoint=false + syncMode: 'Normal', groupCommitEnabled: false, + WAL ≥ 256MB, autoCheckpoint: false Multi-writer throughput - sync_mode=Normal, group_commit=true + syncMode: 'Normal', groupCommitEnabled: true (1-2ms window), chunked batches Max durability - sync_mode=Full, smaller batches + syncMode: 'Full', smaller batches Max speed (test) - sync_mode=Off + syncMode: 'Off'

    Checklist

      -
    • Use batch APIs: create_nodes_batch, add_edges_batch, add_edges_with_props_batch
    • -
    • Prefer begin_bulk() for ingest; commit in chunks
    • +
    • Use batch APIs: createNodesBatch, addEdgesBatch, addEdgesWithPropsBatch
    • +
    • Prefer beginBulk() for ingest; commit in chunks
    • Increase WAL size for large ingest (256MB+)
    • Disable auto-checkpoint during ingest; checkpoint once at the end
    • Use low-level API for hot paths in JS/TS
    • @@ -1101,15 +1105,17 @@ const results = await Promise.all([ // Workers can read concurrently from the same database file`} rust={`use std::sync::{Arc, RwLock}; use std::thread; +use kitedb::api::kite::Kite; -let db = Arc::new(RwLock::new(Kite::open("./data.kitedb")?)); +let db = Arc::new(RwLock::new(Kite::open("./data.kitedb", options)?)); let handles: Vec<_> = (0..4).map(|i| { let db = Arc::clone(&db); thread::spawn(move || { // Multiple threads can acquire read locks simultaneously + let key = format!("user{}", i); let guard = db.read().unwrap(); - guard.get_node(format!("user:{}", i)) + guard.get("user", &key).ok().flatten() }) }).collect(); @@ -1141,47 +1147,12 @@ for t in threads: print(results)`} /> -

      Performance Scaling

      +

      Performance Notes

      - Benchmarks show ~1.5-1.8x throughput improvement with 4-8 reader - threads: + Read throughput typically improves with parallel readers, while write + throughput is constrained by serialized commit ordering. Measure with + your workload and tune batch sizes and sync mode accordingly.

      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      ThreadsRelative ThroughputNotes
      11.0x (baseline)Single-threaded
      2~1.3xGood scaling
      4~1.5-1.6xSweet spot for most workloads
      8~1.6-1.8xDiminishing returns
      16~1.7-1.9xLock contention increases

      Best Practices

        @@ -1203,63 +1174,44 @@ print(results)`}
      -

      MVCC and Snapshot Isolation

      +

      MVCC and Transaction Semantics

      - KiteDB uses Multi-Version Concurrency Control (MVCC) to provide - snapshot isolation: + KiteDB uses Multi-Version Concurrency Control (MVCC) with serialized + writes:

        -
      • Readers never block writers
      • -
      • Writers never block readers
      • +
      • Multiple readers can run concurrently
      • - Each transaction sees a consistent snapshot from its start time + A write waits for in-flight reads, then blocks new reads while it + commits
      • -
      • Write conflicts are detected and one transaction is aborted
      • +
      • Each committed transaction is atomic
      • +
      • Write conflicts are detected at commit time
      { + const alice = ctx.get(user, 'alice'); + if (alice) { + ctx.update(user, 'alice') + .set('name', 'Alice Updated') + .execute(); + } +});`} + rust={`// Atomic transaction with TxContext +db.transaction(|ctx| { + let alice = ctx.get("user", "alice")?; + if let Some(node) = alice { + ctx.set_prop(node.id(), "name", PropValue::String("Alice Updated".into()))?; + } + Ok(()) +})?;`} + python={`# Atomic transaction (context manager handles commit/rollback) +with db.transaction(): + alice = db.get(user, "alice") + if alice is not None: + db.update(user, "alice").set(name="Alice Updated").execute()`} />

      Limitations

      diff --git a/ray-docs/src/routes/docs/internals/-performance.tsx b/ray-docs/src/routes/docs/internals/-performance.tsx index 4e583be..dbcf199 100644 --- a/ray-docs/src/routes/docs/internals/-performance.tsx +++ b/ray-docs/src/routes/docs/internals/-performance.tsx @@ -320,7 +320,7 @@ export function PerformancePage() {

      Latest snapshot (single-file raw, Rust core, 10k nodes / 50k edges, - edge types=3, edge props=10, sync_mode=Normal, group_commit=false, + edge types=3, edge props=10, syncMode=Normal, groupCommitEnabled=false, February 4, 2026):

      @@ -379,16 +379,16 @@ export function PerformancePage() {

      Write Durability vs Throughput

      • - Defaults stay safe: sync_mode=Full,{" "} - group_commit=false. + Defaults stay safe: syncMode=Full,{" "} + groupCommitEnabled=false.
      • Single-writer, low latency:{" "} - sync_mode=Normal + group_commit=false. + syncMode=Normal + groupCommitEnabled=false.
      • Multi-writer throughput:{" "} - sync_mode=Normal + group_commit=true (1-2ms). + syncMode=Normal + groupCommitEnabled=true (1-2ms). {" "} Scaling saturates quickly; prefer prep-parallel + single writer for max ingest. See{" "} @@ -397,7 +397,7 @@ export function PerformancePage() {
      • Highest speed, weakest durability:{" "} - sync_mode=Off (testing/throwaway only). + syncMode=Off (testing/throwaway only).

      @@ -410,8 +410,8 @@ export function PerformancePage() { Workload - sync_mode - group_commit + syncMode + groupCommitEnabled Why @@ -447,14 +447,14 @@ export function PerformancePage() {

      • Fastest ingest (single writer):{" "} - begin_bulk() + create_nodes_batch() +{" "} - add_edges_batch() / add_edges_with_props_batch(),{" "} - sync_mode=Normal, group_commit=false, WAL ≥ 256MB, + beginBulk() + createNodesBatch() +{" "} + addEdgesBatch() / addEdgesWithPropsBatch(),{" "} + syncMode=Normal, groupCommitEnabled=false, WAL ≥ 256MB, auto-checkpoint off during ingest, then checkpoint.
      • Multi-writer throughput:{" "} - sync_mode=Normal + group_commit=true (1-2ms window), + syncMode=Normal + groupCommitEnabled=true (1-2ms window), batched ops per transaction.
      • @@ -463,7 +463,7 @@ export function PerformancePage() {
      • Max speed, lowest durability:{" "} - sync_mode=Off for testing only. + syncMode=Off for testing only.

      @@ -473,10 +473,10 @@ export function PerformancePage() {

      Bulk Ingest Example (Low-Level)

      @@ -553,21 +553,19 @@ const stats = await db.stats();`} diff --git a/ray-docs/src/routes/docs/internals/-wal.tsx b/ray-docs/src/routes/docs/internals/-wal.tsx index d4af9b8..3f9b5e8 100644 --- a/ray-docs/src/routes/docs/internals/-wal.tsx +++ b/ray-docs/src/routes/docs/internals/-wal.tsx @@ -524,19 +524,19 @@ export function WALPage() {
      • - sync_mode = Normal + syncMode = Normal
      • - group_commit_enabled = true + groupCommitEnabled = true
      • - group_commit_window_ms = 2 + groupCommitWindowMs = 2
      • - begin_bulk() + batch APIs for ingest (MVCC disabled) + beginBulk() + batch APIs for ingest (MVCC disabled)
      • - Optional: increase wal_size (e.g., 64MB) for heavy ingest to + Optional: increase walSizeMb (e.g., 64MB) for heavy ingest to reduce checkpoints
      @@ -564,7 +564,7 @@ export function WALPage() { use resizeWal (offline) to grow it, or rebuild into a new file. To prevent single transactions from overfilling the active WAL region, split work into smaller commits (see bulkWrite or - chunked begin_bulk() sessions) and consider disabling + chunked beginBulk() sessions) and consider disabling background checkpoints during ingest.

      diff --git a/ray-docs/src/routes/index.tsx b/ray-docs/src/routes/index.tsx index 46f4904..a7c058b 100644 --- a/ray-docs/src/routes/index.tsx +++ b/ray-docs/src/routes/index.tsx @@ -107,7 +107,7 @@ function HomePage() { typescript: `import { kite } from '@kitedb/core'; // Open database with schema -const db = kite('./knowledge.kitedb', { +const db = await kite('./knowledge.kitedb', { nodes: [ { name: 'document', @@ -187,14 +187,14 @@ const results = db .nodes();`, rust: `// Find all topics discussed by Alice's documents let topics = db - .from(alice.id) + .from(alice.id()) .out(Some("wrote")) // Alice -> Document .out(Some("discusses")) // Document -> Topic .nodes()?; // Multi-hop traversal let results = db - .from(start_node.id) + .from(start_node.id()) .out(Some("knows")) .out(Some("worksAt")) .take(10) @@ -245,7 +245,7 @@ let mut index = VectorIndex::new(VectorIndexOptions { })?; // Add vectors for nodes -index.set(doc.id, &embedding)?; +index.set(doc.id(), &embedding)?; // Find similar documents let similar = index.search(&query_embedding, SimilarOptions { @@ -300,12 +300,12 @@ let doc = db.insert("document") .returning()?; // Create relationships -db.link(doc.id, "discusses", topic.id, Some(json!({ +db.link(doc.id(), "discusses", topic.id(), Some(json!({ "relevance": 0.95 })))?; // Update properties -db.update_by_id(doc.id) +db.update_by_id(doc.id()) .set("title", "Updated Title") .execute()?;`, python: `# Insert with returning @@ -317,8 +317,8 @@ doc = (db.insert(document) db.link(doc, discusses, topic, relevance=0.95) # Update properties -(db.update_by_id(doc.id) - .set("title", "Updated Title") +(db.update(doc) + .set(title="Updated Title") .execute())`, }; @@ -614,8 +614,8 @@ db.link(doc, discusses, topic, relevance=0.95) icon={
  • @@ -671,7 +671,7 @@ db.link(doc, discusses, topic, relevance=0.95) />
    @@ -838,10 +838,10 @@ db.link(doc, discusses, topic, relevance=0.95)

    - HNSW_INDEX + IVF_INDEX

    - O(log n) approximate nearest neighbor queries. + Approximate nearest-neighbor search with tunable probe count.

    diff --git a/ray-rs/Cargo.toml b/ray-rs/Cargo.toml index 3ca524f..376f65e 100644 --- a/ray-rs/Cargo.toml +++ b/ray-rs/Cargo.toml @@ -28,6 +28,13 @@ thiserror = "2.0" # Serialization serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +ureq = "2.10" +base64 = "0.22" +rustls-pemfile = "2.2" +webpki-roots = "1.0" +opentelemetry-proto = { version = "0.31", default-features = false, features = ["gen-tonic", "metrics"] } +prost = "0.14" +tonic = { version = "0.14", features = ["transport", "tls-webpki-roots", "gzip"] } # Binary encoding byteorder = "1.5" @@ -43,8 +50,8 @@ flate2 = "1.1" parking_lot = "0.12" crossbeam-channel = "0.5" -# Async runtime (optional) -tokio = { version = "1.45", features = ["rt", "fs", "sync"], optional = true } +# Async runtime +tokio = { version = "1.45", features = ["rt", "rt-multi-thread", "fs", "sync", "time", "net"] } # Collections hashbrown = "0.15" @@ -81,6 +88,7 @@ napi-build = { version = "2", optional = true } [dev-dependencies] tempfile = "3.20" criterion = "0.5" +neo4rs = "0.8" [[bench]] name = "distance" @@ -106,7 +114,7 @@ harness = false default = ["napi"] napi = ["dep:napi", "dep:napi-derive", "dep:napi-build"] python = ["dep:pyo3", "dep:numpy"] -async = ["tokio"] +async = [] bench-profile = [] [profile.release] diff --git a/ray-rs/README.md b/ray-rs/README.md index f3696f2..ee62afa 100644 --- a/ray-rs/README.md +++ b/ray-rs/README.md @@ -181,6 +181,186 @@ const [aliceFriends, bobFriends] = await Promise.all([ This is implemented using a read-write lock (RwLock) internally, providing good read scalability while maintaining data consistency. +## Replication Admin (low-level API) + +Phase D replication controls are available on the low-level `Database` API. + +```ts +import { Database } from 'kitedb' +import { + collectReplicationLogTransportJson, + collectReplicationMetricsOtelJson, + collectReplicationMetricsOtelProtobuf, + collectReplicationMetricsPrometheus, + collectReplicationSnapshotTransportJson, + createReplicationTransportAdapter, + pushReplicationMetricsOtelGrpc, + pushReplicationMetricsOtelGrpcWithOptions, + pushReplicationMetricsOtelJson, + pushReplicationMetricsOtelJsonWithOptions, + pushReplicationMetricsOtelProtobuf, + pushReplicationMetricsOtelProtobufWithOptions, +} from 'kitedb/native' + +const primary = Database.open('cluster-primary.kitedb', { + replicationRole: 'Primary', + replicationSidecarPath: './cluster-primary.sidecar', + replicationSegmentMaxBytes: 64 * 1024 * 1024, + replicationRetentionMinEntries: 1024, +}) + +primary.begin() +primary.createNode('n:1') +const token = primary.commitWithToken() + +primary.primaryReportReplicaProgress('replica-a', 1, 42) +const retention = primary.primaryRunRetention() +const primaryStatus = primary.primaryReplicationStatus() + +const replica = Database.open('cluster-replica.kitedb', { + replicationRole: 'Replica', + replicationSidecarPath: './cluster-replica.sidecar', + replicationSourceDbPath: 'cluster-primary.kitedb', + replicationSourceSidecarPath: './cluster-primary.sidecar', +}) + +replica.replicaBootstrapFromSnapshot() +replica.replicaCatchUpOnce(256) +if (token) replica.waitForToken(token, 2_000) +const replicaStatus = replica.replicaReplicationStatus() +if (replicaStatus?.needsReseed) replica.replicaReseedFromSnapshot() + +const prometheus = collectReplicationMetricsPrometheus(primary) +console.log(prometheus) + +const otelJson = collectReplicationMetricsOtelJson(primary) +console.log(otelJson) + +const otelProtobuf = collectReplicationMetricsOtelProtobuf(primary) +console.log(otelProtobuf.length) + +const exportResult = pushReplicationMetricsOtelJson( + primary, + 'http://127.0.0.1:4318/v1/metrics', + 5_000, +) +console.log(exportResult.statusCode, exportResult.responseBody) + +const protoExport = pushReplicationMetricsOtelProtobuf( + primary, + 'http://127.0.0.1:4318/v1/metrics', + 5_000, +) +console.log(protoExport.statusCode, protoExport.responseBody) + +const grpcExport = pushReplicationMetricsOtelGrpc( + primary, + 'http://127.0.0.1:4317', + 5_000, +) +console.log(grpcExport.statusCode, grpcExport.responseBody) + +const secureExport = pushReplicationMetricsOtelJsonWithOptions( + primary, + 'https://collector.internal:4318/v1/metrics', + { + timeoutMs: 5_000, + retryMaxAttempts: 3, + retryBackoffMs: 200, + retryBackoffMaxMs: 2_000, + retryJitterRatio: 0.2, + adaptiveRetry: true, + adaptiveRetryMode: 'ewma', + adaptiveRetryEwmaAlpha: 0.35, + circuitBreakerFailureThreshold: 3, + circuitBreakerOpenMs: 30_000, + circuitBreakerHalfOpenProbes: 2, + circuitBreakerStateUrl: 'https://state-store.internal/otlp/breakers', + circuitBreakerStatePatch: true, + circuitBreakerStatePatchBatch: true, + circuitBreakerStatePatchBatchMaxKeys: 4, + circuitBreakerStatePatchMerge: true, + circuitBreakerStatePatchMergeMaxKeys: 16, + circuitBreakerStatePatchRetryMaxAttempts: 2, + circuitBreakerStateCas: true, + circuitBreakerStateLeaseId: 'otlp-writer-a', + circuitBreakerScopeKey: 'collector-a', + compressionGzip: true, + httpsOnly: true, + caCertPemPath: './tls/collector-ca.pem', + clientCertPemPath: './tls/client.pem', + clientKeyPemPath: './tls/client-key.pem', + }, +) +console.log(secureExport.statusCode, secureExport.responseBody) + +const secureProtoExport = pushReplicationMetricsOtelProtobufWithOptions( + primary, + 'https://collector.internal:4318/v1/metrics', + { + timeoutMs: 5_000, + retryMaxAttempts: 3, + retryBackoffMs: 200, + retryBackoffMaxMs: 2_000, + retryJitterRatio: 0.2, + adaptiveRetry: true, + adaptiveRetryMode: 'ewma', + adaptiveRetryEwmaAlpha: 0.35, + circuitBreakerFailureThreshold: 3, + circuitBreakerOpenMs: 30_000, + circuitBreakerHalfOpenProbes: 2, + circuitBreakerStatePath: './runtime/otlp-breakers.json', + circuitBreakerScopeKey: 'collector-a', + compressionGzip: true, + httpsOnly: true, + caCertPemPath: './tls/collector-ca.pem', + clientCertPemPath: './tls/client.pem', + clientKeyPemPath: './tls/client-key.pem', + }, +) +console.log(secureProtoExport.statusCode, secureProtoExport.responseBody) + +const secureGrpcExport = pushReplicationMetricsOtelGrpcWithOptions( + primary, + 'https://collector.internal:4317', + { + timeoutMs: 5_000, + retryMaxAttempts: 3, + retryBackoffMs: 200, + retryBackoffMaxMs: 2_000, + retryJitterRatio: 0.2, + adaptiveRetry: true, + adaptiveRetryMode: 'ewma', + adaptiveRetryEwmaAlpha: 0.35, + circuitBreakerFailureThreshold: 3, + circuitBreakerOpenMs: 30_000, + circuitBreakerHalfOpenProbes: 2, + circuitBreakerStatePath: './runtime/otlp-breakers.json', + circuitBreakerScopeKey: 'collector-a', + compressionGzip: true, + httpsOnly: true, + caCertPemPath: './tls/collector-ca.pem', + clientCertPemPath: './tls/client.pem', + clientKeyPemPath: './tls/client-key.pem', + }, +) +console.log(secureGrpcExport.statusCode, secureGrpcExport.responseBody) + +const snapshotJson = collectReplicationSnapshotTransportJson(primary, false) +console.log(snapshotJson) + +const logPageJson = collectReplicationLogTransportJson(primary, null, 128, 1_048_576, false) +console.log(logPageJson) + +const adapter = createReplicationTransportAdapter(primary) +const snapshot = adapter.snapshot(false) +const logPage = adapter.log({ maxFrames: 128, maxBytes: 1_048_576, includePayload: false }) +console.log(snapshot, logPage) + +replica.close() +primary.close() +``` + ## API surface The Node bindings expose both low-level graph primitives (`Database`) and higher-level APIs (Kite) for schema-driven workflows, plus metrics, backups, traversal, and vector search. For full API details and guides, see the docs: diff --git a/ray-rs/__test__/replication_transport_auth.spec.ts b/ray-rs/__test__/replication_transport_auth.spec.ts new file mode 100644 index 0000000..167d846 --- /dev/null +++ b/ray-rs/__test__/replication_transport_auth.spec.ts @@ -0,0 +1,222 @@ +import test from 'ava' + +import { + authorizeReplicationAdminRequest, + createForwardedTlsMtlsMatcher, + createNodeTlsMtlsMatcher, + createReplicationAdminAuthorizer, + isForwardedTlsClientAuthorized, + isReplicationAdminAuthorized, + isNodeTlsClientAuthorized, + type ReplicationAdminAuthRequest, +} from '../ts/replication_transport' + +type RequestLike = ReplicationAdminAuthRequest & { + tlsAuthorized?: boolean +} + +function request(headers: Record = {}): RequestLike { + return { headers } +} + +test('replication admin auth none mode always allows', (t) => { + t.true(isReplicationAdminAuthorized(request(), { mode: 'none' })) + t.notThrows(() => authorizeReplicationAdminRequest(request(), { mode: 'none' })) +}) + +test('replication admin auth token mode requires bearer token', (t) => { + const cfg = { mode: 'token', token: 'abc123' } as const + t.true(isReplicationAdminAuthorized(request({ authorization: 'Bearer abc123' }), cfg)) + t.false(isReplicationAdminAuthorized(request({ authorization: 'Bearer no' }), cfg)) + t.false(isReplicationAdminAuthorized(request({}), cfg)) +}) + +test('replication admin auth mtls mode supports header + subject regex', (t) => { + const cfg = { + mode: 'mtls', + mtlsHeader: 'x-client-cert', + mtlsSubjectRegex: /^CN=replication-admin,/, + } as const + t.true(isReplicationAdminAuthorized(request({ 'x-client-cert': 'CN=replication-admin,O=RayDB' }), cfg)) + t.false(isReplicationAdminAuthorized(request({ 'x-client-cert': 'CN=viewer,O=RayDB' }), cfg)) +}) + +test('replication admin auth token_or_mtls accepts either', (t) => { + const cfg = { + mode: 'token_or_mtls', + token: 'abc123', + mtlsHeader: 'x-client-cert', + } as const + t.true(isReplicationAdminAuthorized(request({ authorization: 'Bearer abc123' }), cfg)) + t.true(isReplicationAdminAuthorized(request({ 'x-client-cert': 'CN=replication-admin,O=RayDB' }), cfg)) + t.false(isReplicationAdminAuthorized(request({}), cfg)) +}) + +test('replication admin auth token_and_mtls requires both', (t) => { + const cfg = { + mode: 'token_and_mtls', + token: 'abc123', + mtlsHeader: 'x-client-cert', + } as const + t.false(isReplicationAdminAuthorized(request({ authorization: 'Bearer abc123' }), cfg)) + t.false(isReplicationAdminAuthorized(request({ 'x-client-cert': 'CN=replication-admin,O=RayDB' }), cfg)) + t.true( + isReplicationAdminAuthorized( + request({ + authorization: 'Bearer abc123', + 'x-client-cert': 'CN=replication-admin,O=RayDB', + }), + cfg, + ), + ) +}) + +test('replication admin auth supports custom mtls matcher hook', (t) => { + const cfg = { + mode: 'mtls', + mtlsMatcher: (req: RequestLike) => req.tlsAuthorized === true, + } + t.true(isReplicationAdminAuthorized({ headers: {}, tlsAuthorized: true }, cfg)) + t.false(isReplicationAdminAuthorized({ headers: {}, tlsAuthorized: false }, cfg)) +}) + +test('node tls matcher detects authorized socket on common request shapes', (t) => { + t.true(isNodeTlsClientAuthorized({ headers: {}, socket: { authorized: true } })) + t.true(isNodeTlsClientAuthorized({ headers: {}, client: { authorized: true } })) + t.true(isNodeTlsClientAuthorized({ headers: {}, raw: { socket: { authorized: true } } })) + t.true(isNodeTlsClientAuthorized({ headers: {}, req: { socket: { authorized: true } } })) + t.false(isNodeTlsClientAuthorized({ headers: {}, socket: { authorized: false } })) + t.false(isNodeTlsClientAuthorized({ headers: {} })) +}) + +test('node tls matcher supports peer certificate requirement', (t) => { + const withPeer = { + headers: {}, + socket: { + authorized: true, + getPeerCertificate: () => ({ subject: { CN: 'replication-admin' } }), + }, + } + const withoutPeer = { + headers: {}, + socket: { + authorized: true, + getPeerCertificate: () => ({}), + }, + } + t.true(isNodeTlsClientAuthorized(withPeer, { requirePeerCertificate: true })) + t.false(isNodeTlsClientAuthorized(withoutPeer, { requirePeerCertificate: true })) +}) + +test('node tls matcher factory composes into auth config', (t) => { + const requireAdmin = createReplicationAdminAuthorizer({ + mode: 'mtls', + mtlsMatcher: createNodeTlsMtlsMatcher({ requirePeerCertificate: true }), + }) + t.notThrows(() => + requireAdmin({ + headers: {}, + socket: { + authorized: true, + getPeerCertificate: () => ({ subject: { CN: 'replication-admin' } }), + }, + }), + ) + const error = t.throws(() => + requireAdmin({ + headers: {}, + socket: { + authorized: true, + getPeerCertificate: () => ({}), + }, + }), + ) + t.truthy(error) +}) + +test('forwarded tls matcher validates proxy verify headers', (t) => { + t.true( + isForwardedTlsClientAuthorized({ + headers: { 'x-client-verify': 'SUCCESS' }, + }), + ) + t.false( + isForwardedTlsClientAuthorized({ + headers: { 'x-client-verify': 'FAILED' }, + }), + ) + t.false(isForwardedTlsClientAuthorized({ headers: {} })) +}) + +test('forwarded tls matcher supports peer certificate and custom verify policy', (t) => { + t.true( + isForwardedTlsClientAuthorized( + { + headers: { + 'x-client-verify': 'SUCCESS', + 'x-forwarded-client-cert': 'CN=replication-admin,O=RayDB', + }, + }, + { requirePeerCertificate: true }, + ), + ) + t.false( + isForwardedTlsClientAuthorized( + { + headers: { 'x-client-verify': 'SUCCESS' }, + }, + { requirePeerCertificate: true }, + ), + ) + t.true( + isForwardedTlsClientAuthorized( + { + headers: { 'x-forwarded-client-cert': 'CN=replication-admin,O=RayDB' }, + }, + { requireVerifyHeader: false, requirePeerCertificate: true }, + ), + ) +}) + +test('forwarded tls matcher factory composes into auth config', (t) => { + const requireAdmin = createReplicationAdminAuthorizer({ + mode: 'mtls', + mtlsMatcher: createForwardedTlsMtlsMatcher({ requirePeerCertificate: true }), + }) + t.notThrows(() => + requireAdmin({ + headers: { + 'x-client-verify': 'SUCCESS', + 'x-forwarded-client-cert': 'CN=replication-admin,O=RayDB', + }, + }), + ) + const error = t.throws(() => + requireAdmin({ + headers: { + 'x-client-verify': 'FAILED', + 'x-forwarded-client-cert': 'CN=replication-admin,O=RayDB', + }, + }), + ) + t.truthy(error) +}) + +test('replication admin auth helper throws unauthorized and invalid config', (t) => { + const requireAdmin = createReplicationAdminAuthorizer({ + mode: 'token', + token: 'abc123', + }) + const error = t.throws(() => requireAdmin(request({ authorization: 'Bearer wrong' }))) + t.truthy(error) + t.true(String(error?.message).includes('not satisfied')) + + const invalid = t.throws(() => + createReplicationAdminAuthorizer({ + mode: 'token', + token: ' ', + }), + ) + t.truthy(invalid) + t.true(String(invalid?.message).includes('requires a non-empty token')) +}) diff --git a/ray-rs/__test__/replication_transport_flow.spec.ts b/ray-rs/__test__/replication_transport_flow.spec.ts new file mode 100644 index 0000000..9d0c9c0 --- /dev/null +++ b/ray-rs/__test__/replication_transport_flow.spec.ts @@ -0,0 +1,160 @@ +import test from 'ava' + +import fs from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +import { + Database, + collectReplicationLogTransportJson, + collectReplicationMetricsPrometheus, + collectReplicationSnapshotTransportJson, +} from '../index' +import { + createReplicationAdminAuthorizer, + createReplicationTransportAdapter, + type ReplicationAdminAuthRequest, +} from '../ts/replication_transport' + +function makePaths() { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'kitedb-repl-flow-')) + return { + primaryPath: path.join(dir, 'primary.kitedb'), + primarySidecar: path.join(dir, 'primary.sidecar'), + replicaPath: path.join(dir, 'replica.kitedb'), + replicaSidecar: path.join(dir, 'replica.sidecar'), + } +} + +function drainReplica(replica: Database, maxFrames: number, maxLoops = 64): void { + for (let i = 0; i < maxLoops; i += 1) { + const applied = replica.replicaCatchUpOnce(maxFrames) + if (applied === 0) return + } +} + +test('host-runtime replication transport/admin flow is consistent', (t) => { + const paths = makePaths() + const primary = Database.open(paths.primaryPath, { + replicationRole: 'Primary', + replicationSidecarPath: paths.primarySidecar, + replicationSegmentMaxBytes: 1, + replicationRetentionMinEntries: 1, + autoCheckpoint: false, + }) + const stale = Database.open(paths.primaryPath, { + replicationRole: 'Primary', + replicationSidecarPath: paths.primarySidecar, + replicationSegmentMaxBytes: 1, + replicationRetentionMinEntries: 1, + autoCheckpoint: false, + }) + const replica = Database.open(paths.replicaPath, { + replicationRole: 'Replica', + replicationSidecarPath: paths.replicaSidecar, + replicationSourceDbPath: paths.primaryPath, + replicationSourceSidecarPath: paths.primarySidecar, + autoCheckpoint: false, + }) + + t.teardown(() => { + for (const db of [replica, stale, primary]) { + try { + db.close() + } catch {} + } + }) + + primary.begin() + primary.createNode('n:base') + const tokenBase = primary.commitWithToken() + t.true(tokenBase.startsWith('1:')) + + replica.replicaBootstrapFromSnapshot() + const replicaAfterBootstrap = replica.replicaReplicationStatus() + t.false(replicaAfterBootstrap.needsReseed) + t.is(replicaAfterBootstrap.appliedLogIndex, 1) + + const adapter = createReplicationTransportAdapter(primary) + const snapshot = adapter.snapshot(false) + const snapshotDirect = JSON.parse(collectReplicationSnapshotTransportJson(primary, false)) + t.is(snapshot.epoch, snapshotDirect.epoch) + t.is(snapshot.head_log_index, snapshotDirect.head_log_index) + t.truthy(snapshot.start_cursor) + + const logPage = adapter.log({ + cursor: null, + maxFrames: 128, + maxBytes: 1024 * 1024, + includePayload: false, + }) + const logPageDirect = JSON.parse( + collectReplicationLogTransportJson(primary, null, 128, 1024 * 1024, false), + ) + t.is(logPage.frame_count, logPageDirect.frame_count) + t.true(logPage.frame_count >= 1) + + const metricsProm = adapter.metricsPrometheus() + const metricsPromDirect = collectReplicationMetricsPrometheus(primary) + t.true(metricsProm.includes('kitedb_replication_')) + t.is(metricsProm, metricsPromDirect) + + const requireAdmin = createReplicationAdminAuthorizer({ + mode: 'token', + token: 'secret-token', + }) + t.notThrows(() => + requireAdmin({ headers: { authorization: 'Bearer secret-token' } }), + ) + const authErr = t.throws(() => + requireAdmin({ headers: { authorization: 'Bearer wrong-token' } }), + ) + t.truthy(authErr) + + for (let i = 0; i < 6; i += 1) { + primary.begin() + primary.createNode(`n:lag-${i}`) + primary.commitWithToken() + } + + const lagStatus = replica.replicaReplicationStatus() + primary.primaryReportReplicaProgress( + 'replica-a', + lagStatus.appliedEpoch, + lagStatus.appliedLogIndex, + ) + primary.primaryRunRetention() + + const reseedErr = t.throws(() => replica.replicaCatchUpOnce(64)) + t.truthy(reseedErr) + t.regex(String(reseedErr?.message), /reseed/i) + t.true(replica.replicaReplicationStatus().needsReseed) + + primary.checkpoint() + replica.replicaReseedFromSnapshot() + t.false(replica.replicaReplicationStatus().needsReseed) + t.is(replica.countNodes(), primary.countNodes()) + + const beforePromote = primary.primaryReplicationStatus().epoch + const promotedEpoch = primary.primaryPromoteToNextEpoch() + t.true(promotedEpoch > beforePromote) + + stale.begin() + stale.createNode('n:stale-write') + const staleErr = t.throws(() => stale.commitWithToken()) + t.truthy(staleErr) + t.regex(String(staleErr?.message), /stale primary/i) + if (stale.hasTransaction()) { + stale.rollback() + } + + primary.begin() + primary.createNode('n:post-promote') + const promotedToken = primary.commitWithToken() + t.true(promotedToken.startsWith(`${promotedEpoch}:`)) + + t.false(replica.waitForToken(promotedToken, 5)) + drainReplica(replica, 128) + t.true(replica.waitForToken(promotedToken, 2000)) + t.is(replica.countNodes(), primary.countNodes()) +}) diff --git a/ray-rs/benches/single_file.rs b/ray-rs/benches/single_file.rs index 0dac393..e86a566 100644 --- a/ray-rs/benches/single_file.rs +++ b/ray-rs/benches/single_file.rs @@ -5,12 +5,16 @@ use criterion::{ black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput, }; +use std::fs; +use std::path::{Path, PathBuf}; +use std::time::{Duration, Instant}; use tempfile::tempdir; extern crate kitedb; use kitedb::core::single_file::{ - close_single_file, open_single_file, SingleFileOpenOptions, SyncMode, + close_single_file, close_single_file_with_options, open_single_file, SingleFileCloseOptions, + SingleFileOpenOptions, SyncMode, }; use kitedb::types::PropValue; @@ -26,6 +30,152 @@ fn open_bench_db(path: &std::path::Path) -> kitedb::core::single_file::SingleFil .expect("expected value") } +struct OpenCloseFixture { + name: &'static str, + path: PathBuf, + wal_size: usize, +} + +fn seed_graph_fixture( + path: &Path, + node_count: usize, + edge_count: usize, + dirty_wal_tail: usize, + vector_count: usize, + vector_dims: usize, + wal_size: usize, +) { + let db = open_single_file( + path, + SingleFileOpenOptions::new() + .sync_mode(SyncMode::Normal) + .wal_size(wal_size) + .auto_checkpoint(false), + ) + .expect("expected value"); + + if node_count > 0 { + const NODE_BATCH_SIZE: usize = 2_000; + let mut node_ids = Vec::with_capacity(node_count); + + for start in (0..node_count).step_by(NODE_BATCH_SIZE) { + let end = (start + NODE_BATCH_SIZE).min(node_count); + db.begin(false).expect("expected value"); + for i in start..end { + let node_id = db + .create_node(Some(&format!("bench:n{i}"))) + .expect("expected value"); + node_ids.push(node_id); + } + db.commit().expect("expected value"); + } + + if edge_count > 0 { + const EDGE_BATCH_SIZE: usize = 4_000; + db.begin(false).expect("expected value"); + let etype = db.define_etype("bench:connects").expect("expected value"); + db.commit().expect("expected value"); + + for start in (0..edge_count).step_by(EDGE_BATCH_SIZE) { + let end = (start + EDGE_BATCH_SIZE).min(edge_count); + let mut edges = Vec::with_capacity(end - start); + for i in start..end { + let src_idx = i % node_count; + let hop = (i / node_count) + 1; + let mut dst_idx = (src_idx + hop) % node_count; + if dst_idx == src_idx { + dst_idx = (dst_idx + 1) % node_count; + } + edges.push((node_ids[src_idx], etype, node_ids[dst_idx])); + } + db.begin(false).expect("expected value"); + db.add_edges_batch(&edges).expect("expected value"); + db.commit().expect("expected value"); + } + } + + if vector_count > 0 && vector_dims > 0 { + const VECTOR_BATCH_SIZE: usize = 1_000; + let vector_count = vector_count.min(node_ids.len()); + + // Keep fixture generation stable for small WAL sizes by compacting + // node/edge setup before vector batches. + db.checkpoint().expect("expected value"); + + db.begin(false).expect("expected value"); + let vector_prop = db + .define_propkey("bench:embedding") + .expect("expected value"); + db.commit().expect("expected value"); + + for start in (0..vector_count).step_by(VECTOR_BATCH_SIZE) { + let end = (start + VECTOR_BATCH_SIZE).min(vector_count); + db.begin(false).expect("expected value"); + for i in start..end { + let mut vector = vec![0.0f32; vector_dims]; + for (dim, value) in vector.iter_mut().enumerate() { + *value = (((i + dim + 1) % 97) as f32) / 97.0; + } + db.set_node_vector(node_ids[i], vector_prop, &vector) + .expect("expected value"); + } + db.commit().expect("expected value"); + } + } + + db.checkpoint().expect("expected value"); + + if dirty_wal_tail > 0 { + for start in (0..dirty_wal_tail).step_by(NODE_BATCH_SIZE) { + let end = (start + NODE_BATCH_SIZE).min(dirty_wal_tail); + db.begin(false).expect("expected value"); + for i in start..end { + let _ = db + .create_node(Some(&format!("bench:tail{i}"))) + .expect("expected value"); + } + db.commit().expect("expected value"); + } + } + } + + close_single_file(db).expect("expected value"); +} + +fn build_open_close_fixture( + temp_dir: &tempfile::TempDir, + name: &'static str, + node_count: usize, + edge_count: usize, + dirty_wal_tail: usize, + vector_count: usize, + vector_dims: usize, + wal_size: usize, +) -> OpenCloseFixture { + let path = temp_dir.path().join(format!("open-close-{name}.kitedb")); + seed_graph_fixture( + &path, + node_count, + edge_count, + dirty_wal_tail, + vector_count, + vector_dims, + wal_size, + ); + + let size = fs::metadata(&path).expect("expected value").len(); + println!( + "prepared fixture {name}: nodes={node_count}, edges={edge_count}, vectors={vector_count}, vector_dims={vector_dims}, wal_size={} bytes, file_size={} bytes", + wal_size, size + ); + + OpenCloseFixture { + name, + path, + wal_size, + } +} + fn bench_single_file_insert(c: &mut Criterion) { let mut group = c.benchmark_group("single_file_insert"); group.sample_size(10); @@ -96,9 +246,248 @@ fn bench_single_file_checkpoint(c: &mut Criterion) { group.finish(); } +fn bench_single_file_open_close(c: &mut Criterion) { + let mut group = c.benchmark_group("single_file_open_close"); + group.sample_size(30); + + let temp_dir = tempdir().expect("expected value"); + let fixtures = vec![ + build_open_close_fixture(&temp_dir, "empty", 0, 0, 0, 0, 0, 4 * 1024 * 1024), + build_open_close_fixture( + &temp_dir, + "graph_1k_2k", + 1_000, + 2_000, + 0, + 0, + 0, + 4 * 1024 * 1024, + ), + build_open_close_fixture( + &temp_dir, + "graph_10k_20k", + 10_000, + 20_000, + 0, + 0, + 0, + 4 * 1024 * 1024, + ), + build_open_close_fixture( + &temp_dir, + "graph_10k_20k_vec5k", + 10_000, + 20_000, + 0, + 5_000, + 128, + 4 * 1024 * 1024, + ), + ]; + + for fixture in &fixtures { + for (mode_name, read_only) in [("rw", false), ("ro", true)] { + group.bench_with_input( + BenchmarkId::new(format!("open_only/{mode_name}"), fixture.name), + fixture, + |bencher, fixture| { + bencher.iter_custom(|iters| { + let mut total = Duration::ZERO; + for _ in 0..iters { + let start = Instant::now(); + let db = open_single_file( + &fixture.path, + SingleFileOpenOptions::new() + .sync_mode(SyncMode::Normal) + .wal_size(fixture.wal_size) + .create_if_missing(false) + .read_only(read_only), + ) + .expect("expected value"); + total += start.elapsed(); + close_single_file(db).expect("expected value"); + } + total + }); + }, + ); + + group.bench_with_input( + BenchmarkId::new(format!("close_only/{mode_name}"), fixture.name), + fixture, + |bencher, fixture| { + bencher.iter_custom(|iters| { + let mut total = Duration::ZERO; + for _ in 0..iters { + let db = open_single_file( + &fixture.path, + SingleFileOpenOptions::new() + .sync_mode(SyncMode::Normal) + .wal_size(fixture.wal_size) + .create_if_missing(false) + .read_only(read_only), + ) + .expect("expected value"); + let start = Instant::now(); + close_single_file(db).expect("expected value"); + total += start.elapsed(); + } + total + }); + }, + ); + + group.bench_with_input( + BenchmarkId::new(format!("open_close/{mode_name}"), fixture.name), + fixture, + |bencher, fixture| { + bencher.iter_custom(|iters| { + let mut total = Duration::ZERO; + for _ in 0..iters { + let start = Instant::now(); + let db = open_single_file( + &fixture.path, + SingleFileOpenOptions::new() + .sync_mode(SyncMode::Normal) + .wal_size(fixture.wal_size) + .create_if_missing(false) + .read_only(read_only), + ) + .expect("expected value"); + close_single_file(db).expect("expected value"); + total += start.elapsed(); + } + total + }); + }, + ); + } + } + + group.finish(); +} + +fn bench_single_file_open_close_limits(c: &mut Criterion) { + let mut group = c.benchmark_group("single_file_open_close_limits"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(4)); + + let temp_dir = tempdir().expect("expected value"); + let fixtures = vec![ + build_open_close_fixture( + &temp_dir, + "graph_10k_20k_dirty_wal", + 10_000, + 20_000, + 2_000, + 0, + 0, + 64 * 1024 * 1024, + ), + build_open_close_fixture( + &temp_dir, + "graph_100k_200k", + 100_000, + 200_000, + 0, + 0, + 0, + 64 * 1024 * 1024, + ), + build_open_close_fixture( + &temp_dir, + "graph_100k_200k_vec20k", + 100_000, + 200_000, + 0, + 20_000, + 128, + 64 * 1024 * 1024, + ), + build_open_close_fixture( + &temp_dir, + "graph_100k_200k_dirty_wal", + 100_000, + 200_000, + 20_000, + 0, + 0, + 64 * 1024 * 1024, + ), + ]; + + for fixture in &fixtures { + for (mode_name, read_only) in [("rw", false), ("ro", true)] { + group.bench_with_input( + BenchmarkId::new(format!("open_close/{mode_name}"), fixture.name), + fixture, + |bencher, fixture| { + bencher.iter_custom(|iters| { + let mut total = Duration::ZERO; + for _ in 0..iters { + let start = Instant::now(); + let db = open_single_file( + &fixture.path, + SingleFileOpenOptions::new() + .sync_mode(SyncMode::Normal) + .wal_size(fixture.wal_size) + .create_if_missing(false) + .read_only(read_only), + ) + .expect("expected value"); + close_single_file(db).expect("expected value"); + total += start.elapsed(); + } + total + }); + }, + ); + + if fixture.name.contains("dirty_wal") { + group.bench_with_input( + BenchmarkId::new(format!("open_close_ckpt01/{mode_name}"), fixture.name), + fixture, + |bencher, fixture| { + bencher.iter_custom(|iters| { + let bench_tmp = tempdir().expect("expected value"); + let bench_path = bench_tmp.path().join("bench-copy.kitedb"); + fs::copy(&fixture.path, &bench_path).expect("expected value"); + + let mut total = Duration::ZERO; + for _ in 0..iters { + let start = Instant::now(); + let db = open_single_file( + &bench_path, + SingleFileOpenOptions::new() + .sync_mode(SyncMode::Normal) + .wal_size(fixture.wal_size) + .create_if_missing(false) + .read_only(read_only), + ) + .expect("expected value"); + close_single_file_with_options( + db, + SingleFileCloseOptions::new().checkpoint_if_wal_usage_at_least(0.01), + ) + .expect("expected value"); + total += start.elapsed(); + } + total + }); + }, + ); + } + } + } + + group.finish(); +} + criterion_group!( benches, bench_single_file_insert, - bench_single_file_checkpoint + bench_single_file_checkpoint, + bench_single_file_open_close, + bench_single_file_open_close_limits ); criterion_main!(benches); diff --git a/ray-rs/examples/index_pipeline_hypothesis_bench.rs b/ray-rs/examples/index_pipeline_hypothesis_bench.rs new file mode 100644 index 0000000..17ae3cb --- /dev/null +++ b/ray-rs/examples/index_pipeline_hypothesis_bench.rs @@ -0,0 +1,987 @@ +//! Index pipeline hypothesis benchmark for code intelligence workloads. +//! +//! Tests two modes: +//! 1) Sequential: tree-sitter parse -> TS graph write -> SCIP parse -> SCIP graph write -> +//! embed (simulated network) -> vector write. +//! 2) Parallel: tree-sitter + SCIP parse in parallel -> unified graph write -> enqueue; +//! async embed workers batch results; vector writer applies batched writes. +//! +//! Goal: verify whether network latency dominates enough that async batching is the +//! right architecture choice. +//! +//! Usage: +//! cargo run --release --example index_pipeline_hypothesis_bench --no-default-features -- [options] +//! +//! Options: +//! --mode MODE sequential|parallel|both (default: both) +//! --changes N Number of change events (default: 20000) +//! --working-set N Distinct chunk keys reused by events (default: 2000) +//! --vector-dims N Vector dimensions (default: 128) +//! --tree-sitter-latency-ms N Simulated tree-sitter parse latency per event (default: 0) +//! --scip-latency-ms N Simulated SCIP parse latency per event (default: 0) +//! --embed-latency-ms N Simulated remote embedding latency per batch (default: 200) +//! --embed-batch-size N Embedding request batch size (default: 64) +//! --embed-flush-ms N Max wait to fill embed batch (default: 25) +//! --embed-inflight N Parallel embedding requests (default: 4) +//! --vector-apply-batch-size N Vector writes per DB transaction (default: 256) +//! --wal-size BYTES WAL size in bytes (default: 1073741824) +//! --sync-mode MODE Sync mode: full|normal|off (default: normal) +//! --group-commit-enabled Enable group commit (default: false) +//! --group-commit-window-ms N Group commit window in ms (default: 2) +//! --auto-checkpoint Enable auto-checkpoint (default: false) +//! --seed N RNG seed for event generation (default: 42) +//! --keep-db Keep generated DB files for inspection + +use std::collections::{HashMap, VecDeque}; +use std::env; +use std::path::PathBuf; +use std::sync::{Arc, Condvar, Mutex}; +use std::thread; +use std::time::{Duration, Instant}; + +use crossbeam_channel::{unbounded, Receiver, Sender}; +use rand::{rngs::StdRng, Rng, SeedableRng}; +use tempfile::tempdir; + +use kitedb::core::single_file::{ + close_single_file, open_single_file, SingleFileDB, SingleFileOpenOptions, SyncMode, +}; +use kitedb::types::{ETypeId, NodeId, PropKeyId, PropValue}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Mode { + Sequential, + Parallel, + Both, +} + +#[derive(Debug, Clone)] +struct BenchConfig { + mode: Mode, + changes: usize, + working_set: usize, + vector_dims: usize, + tree_sitter_latency_ms: u64, + scip_latency_ms: u64, + embed_latency_ms: u64, + embed_batch_size: usize, + embed_flush_ms: u64, + embed_inflight: usize, + vector_apply_batch_size: usize, + wal_size: usize, + sync_mode: SyncMode, + group_commit_enabled: bool, + group_commit_window_ms: u64, + auto_checkpoint: bool, + seed: u64, + keep_db: bool, +} + +impl Default for BenchConfig { + fn default() -> Self { + Self { + mode: Mode::Both, + changes: 20_000, + working_set: 2_000, + vector_dims: 128, + tree_sitter_latency_ms: 0, + scip_latency_ms: 0, + embed_latency_ms: 200, + embed_batch_size: 64, + embed_flush_ms: 25, + embed_inflight: 4, + vector_apply_batch_size: 256, + wal_size: 1024 * 1024 * 1024, + sync_mode: SyncMode::Normal, + group_commit_enabled: false, + group_commit_window_ms: 2, + auto_checkpoint: false, + seed: 42, + keep_db: false, + } + } +} + +#[derive(Debug, Clone)] +struct ChangeEvent { + chunk_idx: usize, + version: u64, +} + +#[derive(Debug, Clone)] +struct EmbedJob { + chunk_idx: usize, + version: u64, + hot_done_at: Instant, +} + +#[derive(Debug, Default)] +struct QueueStats { + enqueued_jobs: u64, + replaced_jobs: u64, + max_depth: usize, + depth_sum: u128, + depth_samples: u64, +} + +#[derive(Debug)] +struct EmbedQueueState { + pending_by_chunk: HashMap, + order: VecDeque, + closed: bool, + stats: QueueStats, +} + +impl EmbedQueueState { + fn new(capacity: usize) -> Self { + Self { + pending_by_chunk: HashMap::with_capacity(capacity), + order: VecDeque::with_capacity(capacity), + closed: false, + stats: QueueStats::default(), + } + } + + fn sample_depth(&mut self) { + let depth = self.pending_by_chunk.len(); + self.stats.max_depth = self.stats.max_depth.max(depth); + self.stats.depth_sum += depth as u128; + self.stats.depth_samples += 1; + } +} + +struct DbFixture { + db: Arc, + node_ids: Vec, + etype_rel: ETypeId, + node_rev_key: PropKeyId, + node_scip_rev_key: PropKeyId, + edge_weight_key: PropKeyId, + vector_key: PropKeyId, + db_path: PathBuf, + temp_dir: tempfile::TempDir, +} + +#[derive(Debug, Default)] +struct BenchResult { + mode: &'static str, + changes: usize, + applied_vectors: usize, + total_elapsed: Duration, + hot_path_elapsed: Duration, + hot_path_ns: Vec, + vector_freshness_ns: Vec, + enqueued_jobs: u64, + replaced_jobs: u64, + queue_max_depth: usize, + queue_avg_depth: f64, +} + +fn parse_args() -> BenchConfig { + let mut config = BenchConfig::default(); + let args: Vec = env::args().collect(); + let mut i = 1; + + while i < args.len() { + match args[i].as_str() { + "--mode" => { + if let Some(value) = args.get(i + 1) { + config.mode = match value.to_lowercase().as_str() { + "sequential" => Mode::Sequential, + "parallel" => Mode::Parallel, + _ => Mode::Both, + }; + i += 1; + } + } + "--changes" => { + if let Some(value) = args.get(i + 1) { + config.changes = value.parse().unwrap_or(config.changes); + i += 1; + } + } + "--working-set" => { + if let Some(value) = args.get(i + 1) { + config.working_set = value.parse().unwrap_or(config.working_set); + i += 1; + } + } + "--vector-dims" => { + if let Some(value) = args.get(i + 1) { + config.vector_dims = value.parse().unwrap_or(config.vector_dims); + i += 1; + } + } + "--tree-sitter-latency-ms" => { + if let Some(value) = args.get(i + 1) { + config.tree_sitter_latency_ms = value.parse().unwrap_or(config.tree_sitter_latency_ms); + i += 1; + } + } + "--scip-latency-ms" => { + if let Some(value) = args.get(i + 1) { + config.scip_latency_ms = value.parse().unwrap_or(config.scip_latency_ms); + i += 1; + } + } + "--embed-latency-ms" => { + if let Some(value) = args.get(i + 1) { + config.embed_latency_ms = value.parse().unwrap_or(config.embed_latency_ms); + i += 1; + } + } + "--embed-batch-size" => { + if let Some(value) = args.get(i + 1) { + config.embed_batch_size = value.parse().unwrap_or(config.embed_batch_size); + i += 1; + } + } + "--embed-flush-ms" => { + if let Some(value) = args.get(i + 1) { + config.embed_flush_ms = value.parse().unwrap_or(config.embed_flush_ms); + i += 1; + } + } + "--embed-inflight" => { + if let Some(value) = args.get(i + 1) { + config.embed_inflight = value.parse().unwrap_or(config.embed_inflight); + i += 1; + } + } + "--vector-apply-batch-size" => { + if let Some(value) = args.get(i + 1) { + config.vector_apply_batch_size = value.parse().unwrap_or(config.vector_apply_batch_size); + i += 1; + } + } + "--wal-size" => { + if let Some(value) = args.get(i + 1) { + config.wal_size = value.parse().unwrap_or(config.wal_size); + i += 1; + } + } + "--sync-mode" => { + if let Some(value) = args.get(i + 1) { + config.sync_mode = match value.to_lowercase().as_str() { + "full" => SyncMode::Full, + "off" => SyncMode::Off, + _ => SyncMode::Normal, + }; + i += 1; + } + } + "--group-commit-enabled" => { + config.group_commit_enabled = true; + } + "--group-commit-window-ms" => { + if let Some(value) = args.get(i + 1) { + config.group_commit_window_ms = value.parse().unwrap_or(config.group_commit_window_ms); + i += 1; + } + } + "--auto-checkpoint" => { + config.auto_checkpoint = true; + } + "--seed" => { + if let Some(value) = args.get(i + 1) { + config.seed = value.parse().unwrap_or(config.seed); + i += 1; + } + } + "--keep-db" => { + config.keep_db = true; + } + _ => {} + } + i += 1; + } + + if config.changes == 0 { + config.changes = 1; + } + if config.working_set == 0 { + config.working_set = 1; + } + if config.vector_dims == 0 { + config.vector_dims = 1; + } + if config.embed_batch_size == 0 { + config.embed_batch_size = 1; + } + if config.embed_inflight == 0 { + config.embed_inflight = 1; + } + if config.vector_apply_batch_size == 0 { + config.vector_apply_batch_size = 1; + } + + config +} + +fn generate_events(config: &BenchConfig) -> Vec { + let mut rng = StdRng::seed_from_u64(config.seed); + let mut versions = vec![0u64; config.working_set]; + let mut events = Vec::with_capacity(config.changes); + + for _ in 0..config.changes { + let chunk_idx = rng.gen_range(0..config.working_set); + versions[chunk_idx] += 1; + events.push(ChangeEvent { + chunk_idx, + version: versions[chunk_idx], + }); + } + + events +} + +fn format_rate(count: usize, elapsed: Duration) -> String { + let seconds = elapsed.as_secs_f64(); + if seconds <= 0.0 { + return "n/a".to_string(); + } + let rate = count as f64 / seconds; + if rate >= 1_000_000.0 { + return format!("{:.2}M/s", rate / 1_000_000.0); + } + if rate >= 1_000.0 { + return format!("{:.2}K/s", rate / 1_000.0); + } + format!("{rate:.2}/s") +} + +fn format_latency_ns(ns: u128) -> String { + if ns < 1_000 { + format!("{ns}ns") + } else if ns < 1_000_000 { + format!("{:.2}us", ns as f64 / 1_000.0) + } else if ns < 1_000_000_000 { + format!("{:.2}ms", ns as f64 / 1_000_000.0) + } else { + format!("{:.2}s", ns as f64 / 1_000_000_000.0) + } +} + +fn percentile_ns(samples: &[u128], percentile: f64) -> u128 { + if samples.is_empty() { + return 0; + } + let mut sorted = samples.to_vec(); + sorted.sort_unstable(); + let idx = ((sorted.len() as f64) * percentile).floor() as usize; + sorted[idx.min(sorted.len() - 1)] +} + +fn setup_fixture(config: &BenchConfig, label: &str) -> DbFixture { + let temp_dir = tempdir().expect("expected value"); + let db_path = temp_dir + .path() + .join(format!("index-pipeline-{label}.kitedb")); + + let open_opts = SingleFileOpenOptions::new() + .wal_size(config.wal_size) + .sync_mode(config.sync_mode) + .group_commit_enabled(config.group_commit_enabled) + .group_commit_window_ms(config.group_commit_window_ms) + .auto_checkpoint(config.auto_checkpoint); + + let db = open_single_file(&db_path, open_opts).expect("expected value"); + let db = Arc::new(db); + + db.begin(false).expect("expected value"); + let etype_rel = db.define_etype("REL").expect("expected value"); + let node_rev_key = db.define_propkey("rev").expect("expected value"); + let node_scip_rev_key = db.define_propkey("scip_rev").expect("expected value"); + let edge_weight_key = db.define_propkey("weight").expect("expected value"); + let vector_key = db.define_propkey("embedding").expect("expected value"); + db.commit().expect("expected value"); + + let mut node_ids = Vec::with_capacity(config.working_set); + let create_batch = 5000usize; + for start in (0..config.working_set).step_by(create_batch) { + let end = (start + create_batch).min(config.working_set); + db.begin_bulk().expect("expected value"); + let mut keys = Vec::with_capacity(end - start); + for idx in start..end { + keys.push(format!("chunk:{idx}")); + } + let key_refs: Vec> = keys.iter().map(|k| Some(k.as_str())).collect(); + let ids = db.create_nodes_batch(&key_refs).expect("expected value"); + node_ids.extend(ids); + db.commit().expect("expected value"); + } + + let edge_batch = 10_000usize; + for start in (0..config.working_set).step_by(edge_batch) { + let end = (start + edge_batch).min(config.working_set); + db.begin_bulk().expect("expected value"); + let mut edges = Vec::with_capacity(end - start); + for idx in start..end { + let src = node_ids[idx]; + let dst = node_ids[(idx + 1) % node_ids.len()]; + edges.push((src, etype_rel, dst)); + } + db.add_edges_batch(&edges).expect("expected value"); + db.commit().expect("expected value"); + } + + db.vector_store_or_create(vector_key, config.vector_dims) + .expect("expected value"); + + DbFixture { + db, + node_ids, + etype_rel, + node_rev_key, + node_scip_rev_key, + edge_weight_key, + vector_key, + db_path, + temp_dir, + } +} + +fn apply_graph_change_ts_tx(fixture: &DbFixture, event: &ChangeEvent) { + let src = fixture.node_ids[event.chunk_idx]; + let dst = fixture.node_ids[(event.chunk_idx + 1) % fixture.node_ids.len()]; + + fixture.db.begin(false).expect("expected value"); + fixture + .db + .set_node_prop( + src, + fixture.node_rev_key, + PropValue::I64(event.version as i64), + ) + .expect("expected value"); + fixture + .db + .set_edge_prop( + src, + fixture.etype_rel, + dst, + fixture.edge_weight_key, + PropValue::F64((event.version % 1024) as f64 / 1024.0), + ) + .expect("expected value"); + fixture.db.commit().expect("expected value"); +} + +fn apply_graph_change_scip_tx(fixture: &DbFixture, event: &ChangeEvent) { + let src = fixture.node_ids[event.chunk_idx]; + + fixture.db.begin(false).expect("expected value"); + fixture + .db + .set_node_prop( + src, + fixture.node_scip_rev_key, + PropValue::I64(event.version as i64), + ) + .expect("expected value"); + fixture.db.commit().expect("expected value"); +} + +fn apply_graph_change_unified_tx(fixture: &DbFixture, event: &ChangeEvent) { + let src = fixture.node_ids[event.chunk_idx]; + let dst = fixture.node_ids[(event.chunk_idx + 1) % fixture.node_ids.len()]; + + fixture.db.begin(false).expect("expected value"); + fixture + .db + .set_node_prop( + src, + fixture.node_rev_key, + PropValue::I64(event.version as i64), + ) + .expect("expected value"); + fixture + .db + .set_node_prop( + src, + fixture.node_scip_rev_key, + PropValue::I64(event.version as i64), + ) + .expect("expected value"); + fixture + .db + .set_edge_prop( + src, + fixture.etype_rel, + dst, + fixture.edge_weight_key, + PropValue::F64((event.version % 1024) as f64 / 1024.0), + ) + .expect("expected value"); + fixture.db.commit().expect("expected value"); +} + +fn apply_vector_batch( + fixture: &DbFixture, + dims: usize, + jobs: &[EmbedJob], + freshness_samples: &mut Vec, +) { + if jobs.is_empty() { + return; + } + + fixture.db.begin(false).expect("expected value"); + for job in jobs { + let node_id = fixture.node_ids[job.chunk_idx]; + let value = (job.version % 1024) as f32 / 1024.0; + let vector = vec![value; dims]; + fixture + .db + .set_node_vector(node_id, fixture.vector_key, &vector) + .expect("expected value"); + } + fixture.db.commit().expect("expected value"); + + let now = Instant::now(); + for job in jobs { + freshness_samples.push(now.duration_since(job.hot_done_at).as_nanos()); + } +} + +fn run_sequential(config: &BenchConfig, events: &[ChangeEvent]) -> BenchResult { + let fixture = setup_fixture(config, "sequential"); + let run_start = Instant::now(); + let mut hot_path_ns = Vec::with_capacity(events.len()); + let mut vector_freshness_ns = Vec::with_capacity(events.len()); + let ts_sleep = Duration::from_millis(config.tree_sitter_latency_ms); + let scip_sleep = Duration::from_millis(config.scip_latency_ms); + let embed_sleep = Duration::from_millis(config.embed_latency_ms); + let mut last_hot_done = run_start; + + for event in events { + let op_start = Instant::now(); + if config.tree_sitter_latency_ms > 0 { + thread::sleep(ts_sleep); + } + apply_graph_change_ts_tx(&fixture, event); + if config.scip_latency_ms > 0 { + thread::sleep(scip_sleep); + } + apply_graph_change_scip_tx(&fixture, event); + let hot_done = Instant::now(); + last_hot_done = hot_done; + hot_path_ns.push(hot_done.duration_since(op_start).as_nanos()); + + if config.embed_latency_ms > 0 { + thread::sleep(embed_sleep); + } + let job = EmbedJob { + chunk_idx: event.chunk_idx, + version: event.version, + hot_done_at: hot_done, + }; + apply_vector_batch( + &fixture, + config.vector_dims, + &[job], + &mut vector_freshness_ns, + ); + } + + let total_elapsed = run_start.elapsed(); + let hot_path_elapsed = last_hot_done.duration_since(run_start); + + if config.keep_db { + println!("Sequential DB kept at: {}", fixture.db_path.display()); + std::mem::forget(fixture.temp_dir); + } + + if let Ok(db) = Arc::try_unwrap(fixture.db) { + close_single_file(db).expect("expected value"); + } else { + println!("Warning: failed to unwrap DB Arc; skipping explicit close"); + } + + BenchResult { + mode: "sequential", + changes: events.len(), + applied_vectors: vector_freshness_ns.len(), + total_elapsed, + hot_path_elapsed, + hot_path_ns, + vector_freshness_ns, + ..BenchResult::default() + } +} + +fn enqueue_job( + queue: &Arc<(Mutex, Condvar)>, + chunk_capacity: usize, + job: EmbedJob, +) { + let (lock, cv) = &**queue; + let mut state = lock.lock().expect("expected value"); + + if state.pending_by_chunk.capacity() == 0 { + state.pending_by_chunk.reserve(chunk_capacity); + } + + state.stats.enqueued_jobs += 1; + let chunk_idx = job.chunk_idx; + if state.pending_by_chunk.insert(chunk_idx, job).is_some() { + state.stats.replaced_jobs += 1; + } else { + state.order.push_back(chunk_idx); + } + state.sample_depth(); + cv.notify_one(); +} + +fn take_embed_batch( + queue: &Arc<(Mutex, Condvar)>, + batch_size: usize, + flush_window: Duration, +) -> Option> { + let (lock, cv) = &**queue; + let mut state = lock.lock().expect("expected value"); + + loop { + while state.order.is_empty() && !state.closed { + state = cv.wait(state).expect("expected value"); + } + + if state.order.is_empty() && state.closed { + return None; + } + + if !flush_window.is_zero() && state.order.len() < batch_size && !state.closed { + let (next_state, _) = cv + .wait_timeout(state, flush_window) + .expect("expected value"); + state = next_state; + if state.order.is_empty() && state.closed { + return None; + } + } + + let mut batch = Vec::with_capacity(batch_size); + while batch.len() < batch_size { + let Some(chunk_idx) = state.order.pop_front() else { + break; + }; + if let Some(job) = state.pending_by_chunk.remove(&chunk_idx) { + batch.push(job); + state.sample_depth(); + } + } + + if !batch.is_empty() { + return Some(batch); + } + + if state.closed { + return None; + } + } +} + +fn run_parallel(config: &BenchConfig, events: &[ChangeEvent]) -> BenchResult { + let fixture = setup_fixture(config, "parallel"); + let run_start = Instant::now(); + let mut hot_path_ns = Vec::with_capacity(events.len()); + let ts_sleep = Duration::from_millis(config.tree_sitter_latency_ms); + let scip_sleep = Duration::from_millis(config.scip_latency_ms); + let embed_sleep = Duration::from_millis(config.embed_latency_ms); + let embed_flush = Duration::from_millis(config.embed_flush_ms); + let mut last_hot_done = run_start; + + let queue = Arc::new(( + Mutex::new(EmbedQueueState::new(config.working_set)), + Condvar::new(), + )); + let (result_tx, result_rx): (Sender>, Receiver>) = unbounded(); + + let mut embed_handles = Vec::with_capacity(config.embed_inflight); + for _ in 0..config.embed_inflight { + let queue = Arc::clone(&queue); + let tx = result_tx.clone(); + let batch_size = config.embed_batch_size; + let embed_sleep = embed_sleep; + let embed_flush = embed_flush; + embed_handles.push(thread::spawn(move || { + while let Some(batch) = take_embed_batch(&queue, batch_size, embed_flush) { + if !embed_sleep.is_zero() { + thread::sleep(embed_sleep); + } + if tx.send(batch).is_err() { + return; + } + } + })); + } + drop(result_tx); + + let writer_db = Arc::clone(&fixture.db); + let writer_node_ids = fixture.node_ids.clone(); + let vector_key = fixture.vector_key; + let dims = config.vector_dims; + let apply_batch_size = config.vector_apply_batch_size; + let writer_handle = thread::spawn(move || { + let mut apply_buffer: Vec = Vec::with_capacity(apply_batch_size * 2); + let mut freshness = Vec::new(); + let mut applied = 0usize; + + for mut batch in result_rx { + apply_buffer.append(&mut batch); + while apply_buffer.len() >= apply_batch_size { + let chunk: Vec = apply_buffer.drain(..apply_batch_size).collect(); + writer_db.begin(false).expect("expected value"); + for job in &chunk { + let node_id = writer_node_ids[job.chunk_idx]; + let value = (job.version % 1024) as f32 / 1024.0; + let vector = vec![value; dims]; + writer_db + .set_node_vector(node_id, vector_key, &vector) + .expect("expected value"); + } + writer_db.commit().expect("expected value"); + let now = Instant::now(); + for job in &chunk { + freshness.push(now.duration_since(job.hot_done_at).as_nanos()); + } + applied += chunk.len(); + } + } + + if !apply_buffer.is_empty() { + writer_db.begin(false).expect("expected value"); + for job in &apply_buffer { + let node_id = writer_node_ids[job.chunk_idx]; + let value = (job.version % 1024) as f32 / 1024.0; + let vector = vec![value; dims]; + writer_db + .set_node_vector(node_id, vector_key, &vector) + .expect("expected value"); + } + writer_db.commit().expect("expected value"); + let now = Instant::now(); + for job in &apply_buffer { + freshness.push(now.duration_since(job.hot_done_at).as_nanos()); + } + applied += apply_buffer.len(); + } + + (freshness, applied) + }); + + for event in events { + let op_start = Instant::now(); + if config.tree_sitter_latency_ms > 0 || config.scip_latency_ms > 0 { + let parse_parallel_sleep = ts_sleep.max(scip_sleep); + thread::sleep(parse_parallel_sleep); + } + apply_graph_change_unified_tx(&fixture, event); + let hot_done = Instant::now(); + last_hot_done = hot_done; + hot_path_ns.push(hot_done.duration_since(op_start).as_nanos()); + + enqueue_job( + &queue, + config.working_set, + EmbedJob { + chunk_idx: event.chunk_idx, + version: event.version, + hot_done_at: hot_done, + }, + ); + } + + { + let (lock, cv) = &*queue; + let mut state = lock.lock().expect("expected value"); + state.closed = true; + cv.notify_all(); + } + + for handle in embed_handles { + handle.join().expect("expected value"); + } + + let (vector_freshness_ns, applied_vectors) = writer_handle.join().expect("expected value"); + let total_elapsed = run_start.elapsed(); + let hot_path_elapsed = last_hot_done.duration_since(run_start); + + let (enqueued_jobs, replaced_jobs, queue_max_depth, queue_avg_depth) = { + let (lock, _) = &*queue; + let state = lock.lock().expect("expected value"); + let samples = state.stats.depth_samples.max(1); + ( + state.stats.enqueued_jobs, + state.stats.replaced_jobs, + state.stats.max_depth, + state.stats.depth_sum as f64 / samples as f64, + ) + }; + + if config.keep_db { + println!("Parallel DB kept at: {}", fixture.db_path.display()); + std::mem::forget(fixture.temp_dir); + } + + if let Ok(db) = Arc::try_unwrap(fixture.db) { + close_single_file(db).expect("expected value"); + } else { + println!("Warning: failed to unwrap DB Arc; skipping explicit close"); + } + + BenchResult { + mode: "parallel", + changes: events.len(), + applied_vectors, + total_elapsed, + hot_path_elapsed, + hot_path_ns, + vector_freshness_ns, + enqueued_jobs, + replaced_jobs, + queue_max_depth, + queue_avg_depth, + } +} + +fn print_result(result: &BenchResult) { + let hot_p50 = percentile_ns(&result.hot_path_ns, 0.50); + let hot_p95 = percentile_ns(&result.hot_path_ns, 0.95); + let hot_p99 = percentile_ns(&result.hot_path_ns, 0.99); + let fresh_p50 = percentile_ns(&result.vector_freshness_ns, 0.50); + let fresh_p95 = percentile_ns(&result.vector_freshness_ns, 0.95); + let fresh_p99 = percentile_ns(&result.vector_freshness_ns, 0.99); + let hot_rate = format_rate(result.changes, result.hot_path_elapsed); + let end_to_end_rate = format_rate(result.changes, result.total_elapsed); + + println!("\n--- {} ---", result.mode); + println!("Changes: {}", result.changes); + println!("Vectors applied: {}", result.applied_vectors); + println!( + "Hot path elapsed: {:.3}s", + result.hot_path_elapsed.as_secs_f64() + ); + println!("Total elapsed: {:.3}s", result.total_elapsed.as_secs_f64()); + println!("Hot path rate: {hot_rate}"); + println!("End-to-end rate: {end_to_end_rate}"); + println!( + "Hot path latency: p50={} p95={} p99={}", + format_latency_ns(hot_p50), + format_latency_ns(hot_p95), + format_latency_ns(hot_p99) + ); + println!( + "Vector freshness: p50={} p95={} p99={}", + format_latency_ns(fresh_p50), + format_latency_ns(fresh_p95), + format_latency_ns(fresh_p99) + ); + + if result.mode == "parallel" { + let replace_rate = if result.enqueued_jobs > 0 { + (result.replaced_jobs as f64 / result.enqueued_jobs as f64) * 100.0 + } else { + 0.0 + }; + println!( + "Queue: enqueued={} replaced={} ({replace_rate:.2}%) max_depth={} avg_depth={:.2}", + result.enqueued_jobs, result.replaced_jobs, result.queue_max_depth, result.queue_avg_depth + ); + } +} + +fn print_comparison(seq: &BenchResult, par: &BenchResult) { + let seq_hot_p95 = percentile_ns(&seq.hot_path_ns, 0.95); + let par_hot_p95 = percentile_ns(&par.hot_path_ns, 0.95); + let seq_fresh_p95 = percentile_ns(&seq.vector_freshness_ns, 0.95); + let par_fresh_p95 = percentile_ns(&par.vector_freshness_ns, 0.95); + + let hot_gain = if par.hot_path_elapsed.as_nanos() > 0 { + seq.hot_path_elapsed.as_secs_f64() / par.hot_path_elapsed.as_secs_f64() + } else { + 0.0 + }; + let end_to_end_gain = if par.total_elapsed.as_nanos() > 0 { + seq.total_elapsed.as_secs_f64() / par.total_elapsed.as_secs_f64() + } else { + 0.0 + }; + + println!("\n=== Comparison (sequential vs parallel) ==="); + println!("Hot path elapsed speedup: {hot_gain:.2}x"); + println!("End-to-end elapsed speedup: {end_to_end_gain:.2}x"); + println!( + "Hot p95: {} -> {}", + format_latency_ns(seq_hot_p95), + format_latency_ns(par_hot_p95) + ); + println!( + "Freshness p95: {} -> {}", + format_latency_ns(seq_fresh_p95), + format_latency_ns(par_fresh_p95) + ); +} + +fn main() { + let config = parse_args(); + let events = generate_events(&config); + + println!("=================================================================="); + println!("Index Pipeline Hypothesis Benchmark"); + println!("=================================================================="); + println!("Mode: {:?}", config.mode); + println!("Changes: {}", config.changes); + println!("Working set: {}", config.working_set); + println!("Vector dims: {}", config.vector_dims); + println!( + "Parse latency: tree-sitter={}ms scip={}ms", + config.tree_sitter_latency_ms, config.scip_latency_ms + ); + println!("Embed latency: {}ms per batch", config.embed_latency_ms); + println!( + "Embed batching: size={} flush={}ms inflight={}", + config.embed_batch_size, config.embed_flush_ms, config.embed_inflight + ); + println!( + "Vector apply batch size: {}", + config.vector_apply_batch_size + ); + println!("WAL size: {} bytes", config.wal_size); + println!("Sync mode: {:?}", config.sync_mode); + println!( + "Group commit: {} (window {}ms)", + config.group_commit_enabled, config.group_commit_window_ms + ); + println!("Auto-checkpoint: {}", config.auto_checkpoint); + println!("Seed: {}", config.seed); + println!("=================================================================="); + + let mut seq_result: Option = None; + let mut par_result: Option = None; + + match config.mode { + Mode::Sequential => { + let result = run_sequential(&config, &events); + print_result(&result); + seq_result = Some(result); + } + Mode::Parallel => { + let result = run_parallel(&config, &events); + print_result(&result); + par_result = Some(result); + } + Mode::Both => { + let seq = run_sequential(&config, &events); + print_result(&seq); + let par = run_parallel(&config, &events); + print_result(&par); + seq_result = Some(seq); + par_result = Some(par); + } + } + + if let (Some(seq), Some(par)) = (seq_result.as_ref(), par_result.as_ref()) { + print_comparison(seq, par); + } +} diff --git a/ray-rs/examples/ray_vs_memgraph_bench.rs b/ray-rs/examples/ray_vs_memgraph_bench.rs new file mode 100644 index 0000000..3abfece --- /dev/null +++ b/ray-rs/examples/ray_vs_memgraph_bench.rs @@ -0,0 +1,568 @@ +//! RayDB vs Memgraph 1-hop traversal benchmark (Rust). +//! +//! Workload: +//! - Build the same graph in both engines +//! - Default: 10k nodes, 20k edges +//! - Query equivalent to `from(alice).out(KNOWS).toArray()` +//! - Alice fan-out defaults to 10 (configurable in 5-20 range) +//! +//! Usage: +//! cargo run --release --example ray_vs_memgraph_bench --no-default-features -- \ +//! --nodes 10000 --edges 20000 --query-results 10 --iterations 5000 + +use std::collections::HashSet; +use std::env; +use std::error::Error; +use std::time::Instant; + +use neo4rs::{query, ConfigBuilder, Graph}; +use rand::{rngs::StdRng, Rng, SeedableRng}; +use tempfile::{tempdir, TempDir}; + +use kitedb::api::kite::{EdgeDef, Kite, KiteOptions, NodeDef}; +use kitedb::core::single_file::{ + close_single_file, open_single_file, SingleFileOpenOptions, SyncMode, +}; +use kitedb::types::{ETypeId, NodeId}; + +#[derive(Debug, Clone)] +struct BenchConfig { + nodes: usize, + edges: usize, + query_results: usize, + iterations: usize, + warmup: usize, + seed: u64, + batch_size: usize, + memgraph_uri: String, + memgraph_user: String, + memgraph_password: String, + keep_db: bool, +} + +impl Default for BenchConfig { + fn default() -> Self { + Self { + nodes: 10_000, + edges: 20_000, + query_results: 10, + iterations: 5_000, + warmup: 500, + seed: 42, + batch_size: 1_000, + memgraph_uri: "127.0.0.1:7687".to_string(), + memgraph_user: String::new(), + memgraph_password: String::new(), + keep_db: false, + } + } +} + +#[derive(Debug, Clone, Copy)] +struct LatencyStats { + count: usize, + max: u128, + sum: u128, + p50: u128, + p95: u128, + p99: u128, +} + +fn compute_stats(samples: &mut [u128]) -> LatencyStats { + if samples.is_empty() { + return LatencyStats { + count: 0, + max: 0, + sum: 0, + p50: 0, + p95: 0, + p99: 0, + }; + } + + samples.sort_unstable(); + let count = samples.len(); + let max = samples[count - 1]; + let sum: u128 = samples.iter().copied().sum(); + let p50 = samples[(count as f64 * 0.50).floor() as usize]; + let p95 = samples[(count as f64 * 0.95).floor() as usize]; + let p99 = samples[(count as f64 * 0.99).floor() as usize]; + + LatencyStats { + count, + max, + sum, + p50, + p95, + p99, + } +} + +fn parse_args() -> Result { + let mut cfg = BenchConfig::default(); + let args: Vec = env::args().collect(); + + let mut i = 1; + while i < args.len() { + match args[i].as_str() { + "--nodes" => { + i += 1; + cfg.nodes = args + .get(i) + .ok_or("--nodes requires value")? + .parse() + .map_err(|_| "invalid --nodes")?; + } + "--edges" => { + i += 1; + cfg.edges = args + .get(i) + .ok_or("--edges requires value")? + .parse() + .map_err(|_| "invalid --edges")?; + } + "--query-results" => { + i += 1; + cfg.query_results = args + .get(i) + .ok_or("--query-results requires value")? + .parse() + .map_err(|_| "invalid --query-results")?; + } + "--iterations" => { + i += 1; + cfg.iterations = args + .get(i) + .ok_or("--iterations requires value")? + .parse() + .map_err(|_| "invalid --iterations")?; + } + "--warmup" => { + i += 1; + cfg.warmup = args + .get(i) + .ok_or("--warmup requires value")? + .parse() + .map_err(|_| "invalid --warmup")?; + } + "--seed" => { + i += 1; + cfg.seed = args + .get(i) + .ok_or("--seed requires value")? + .parse() + .map_err(|_| "invalid --seed")?; + } + "--batch-size" => { + i += 1; + cfg.batch_size = args + .get(i) + .ok_or("--batch-size requires value")? + .parse() + .map_err(|_| "invalid --batch-size")?; + } + "--memgraph-uri" => { + i += 1; + cfg.memgraph_uri = args + .get(i) + .ok_or("--memgraph-uri requires value")? + .to_string(); + } + "--memgraph-user" => { + i += 1; + cfg.memgraph_user = args + .get(i) + .ok_or("--memgraph-user requires value")? + .to_string(); + } + "--memgraph-password" => { + i += 1; + cfg.memgraph_password = args + .get(i) + .ok_or("--memgraph-password requires value")? + .to_string(); + } + "--keep-db" => { + cfg.keep_db = true; + } + "--help" | "-h" => { + print_help(); + std::process::exit(0); + } + other => return Err(format!("unknown argument: {other}")), + } + i += 1; + } + + if cfg.nodes < 2 { + return Err("--nodes must be >= 2".to_string()); + } + if cfg.edges < 1 { + return Err("--edges must be >= 1".to_string()); + } + if cfg.query_results < 1 { + return Err("--query-results must be >= 1".to_string()); + } + if cfg.query_results >= cfg.nodes { + return Err("--query-results must be < --nodes".to_string()); + } + if cfg.query_results > cfg.edges { + return Err("--query-results must be <= --edges".to_string()); + } + if cfg.iterations < 1 { + return Err("--iterations must be >= 1".to_string()); + } + if cfg.batch_size < 1 { + return Err("--batch-size must be >= 1".to_string()); + } + + Ok(cfg) +} + +fn print_help() { + println!("RayDB vs Memgraph traversal benchmark"); + println!(); + println!("Options:"); + println!(" --nodes N Number of nodes (default: 10000)"); + println!(" --edges N Number of edges (default: 20000)"); + println!(" --query-results N Alice outgoing neighbors (default: 10)"); + println!(" --iterations N Timed query iterations (default: 5000)"); + println!(" --warmup N Warmup iterations (default: 500)"); + println!(" --seed N RNG seed (default: 42)"); + println!(" --batch-size N Batch size for ingest (default: 1000)"); + println!(" --memgraph-uri URI Memgraph Bolt URI (default: 127.0.0.1:7687)"); + println!(" --memgraph-user USER Memgraph username (default: empty)"); + println!(" --memgraph-password P Memgraph password (default: empty)"); + println!(" --keep-db Keep local RayDB file"); +} + +fn format_latency(ns: u128) -> String { + if ns < 1_000 { + return format!("{ns}ns"); + } + if ns < 1_000_000 { + return format!("{:.2}us", ns as f64 / 1_000.0); + } + format!("{:.2}ms", ns as f64 / 1_000_000.0) +} + +fn format_number(n: usize) -> String { + let s = n.to_string(); + let mut out = String::new(); + for (count, ch) in s.chars().rev().enumerate() { + if count > 0 && count % 3 == 0 { + out.push(','); + } + out.push(ch); + } + out.chars().rev().collect() +} + +fn print_stats(name: &str, stats: LatencyStats) { + let ops = if stats.sum > 0 { + stats.count as f64 / (stats.sum as f64 / 1_000_000_000.0) + } else { + 0.0 + }; + println!( + "{:<10} p50={:>10} p95={:>10} p99={:>10} max={:>10} ({:.0} ops/sec)", + name, + format_latency(stats.p50), + format_latency(stats.p95), + format_latency(stats.p99), + format_latency(stats.max), + ops + ); +} + +fn build_workload(cfg: &BenchConfig) -> (Vec, Vec<(usize, usize)>) { + let mut keys = Vec::with_capacity(cfg.nodes); + keys.push("user:alice".to_string()); + for i in 1..cfg.nodes { + keys.push(format!("user:u{i}")); + } + + let mut edges: HashSet<(usize, usize)> = HashSet::with_capacity(cfg.edges * 2); + for dst in 1..=cfg.query_results { + edges.insert((0, dst)); + } + + let mut rng = StdRng::seed_from_u64(cfg.seed); + while edges.len() < cfg.edges { + let src = rng.gen_range(1..cfg.nodes); // keep alice fan-out fixed + let dst = rng.gen_range(0..cfg.nodes); + if src != dst { + edges.insert((src, dst)); + } + } + + (keys, edges.into_iter().collect()) +} + +fn ingest_raydb( + raydb_path: &std::path::Path, + cfg: &BenchConfig, + keys: &[String], + edges: &[(usize, usize)], +) -> Result<(u128, ETypeId), Box> { + let started = Instant::now(); + let options = SingleFileOpenOptions::new() + .sync_mode(SyncMode::Normal) + .create_if_missing(true); + let db = open_single_file(raydb_path, options)?; + + db.begin_bulk()?; + let knows = db.define_etype("KNOWS")?; + db.commit()?; + let mut node_ids: Vec = Vec::with_capacity(keys.len()); + + for start in (0..keys.len()).step_by(cfg.batch_size) { + let end = (start + cfg.batch_size).min(keys.len()); + db.begin_bulk()?; + let key_refs: Vec> = keys[start..end].iter().map(|k| Some(k.as_str())).collect(); + let batch_ids = db.create_nodes_batch(&key_refs)?; + node_ids.extend(batch_ids); + db.commit()?; + } + + for start in (0..edges.len()).step_by(cfg.batch_size) { + let end = (start + cfg.batch_size).min(edges.len()); + let mut batch = Vec::with_capacity(end - start); + for (src_index, dst_index) in &edges[start..end] { + batch.push((node_ids[*src_index], knows, node_ids[*dst_index])); + } + db.begin_bulk()?; + db.add_edges_batch(&batch)?; + db.commit()?; + } + + close_single_file(db)?; + Ok((started.elapsed().as_millis(), knows)) +} + +fn benchmark_raydb_query( + raydb_path: &std::path::Path, + cfg: &BenchConfig, +) -> Result<(LatencyStats, usize), Box> { + let user = NodeDef::new("User", "user:"); + let knows = EdgeDef::new("KNOWS"); + let options = KiteOptions::new() + .node(user) + .edge(knows) + .sync_mode(SyncMode::Normal); + let kite = Kite::open(raydb_path, options)?; + let alice = kite + .raw() + .node_by_key("user:alice") + .ok_or("missing alice in RayDB")?; + + for _ in 0..cfg.warmup { + let _ = kite.from(alice).out(Some("KNOWS"))?.to_vec(); + } + + let mut samples = Vec::with_capacity(cfg.iterations); + let mut result_count = 0usize; + + for _ in 0..cfg.iterations { + let start = Instant::now(); + let rows = kite.from(alice).out(Some("KNOWS"))?.to_vec(); + samples.push(start.elapsed().as_nanos()); + result_count = rows.len(); + } + + kite.close()?; + Ok((compute_stats(&mut samples), result_count)) +} + +fn normalize_memgraph_uri(uri: &str) -> String { + uri + .trim_start_matches("bolt://") + .trim_start_matches("neo4j://") + .to_string() +} + +fn cypher_quote(value: &str) -> String { + value.replace('\\', "\\\\").replace('\'', "\\'") +} + +async fn memgraph_connect(cfg: &BenchConfig) -> Result> { + let config = ConfigBuilder::default() + .uri(&normalize_memgraph_uri(&cfg.memgraph_uri)) + .user(&cfg.memgraph_user) + .password(&cfg.memgraph_password) + .db("memgraph") + .fetch_size(1000) + .max_connections(8) + .build()?; + Ok(Graph::connect(config).await?) +} + +async fn memgraph_run(graph: &Graph, q: &str) -> Result<(), Box> { + graph.run(query(q)).await?; + Ok(()) +} + +async fn memgraph_count_rows(graph: &Graph, q: &str) -> Result> { + let mut rows = graph.execute(query(q)).await?; + let mut count = 0usize; + loop { + match rows.next().await { + Ok(Some(_)) => count += 1, + Ok(None) => break, + Err(err) => return Err(Box::new(err)), + } + } + Ok(count) +} + +async fn ingest_memgraph( + graph: &Graph, + cfg: &BenchConfig, + keys: &[String], + edges: &[(usize, usize)], +) -> Result> { + let started = Instant::now(); + + memgraph_run(graph, "MATCH (n) DETACH DELETE n").await?; + let _ = memgraph_run(graph, "CREATE INDEX ON :User(key)").await; + + for start in (0..keys.len()).step_by(cfg.batch_size) { + let end = (start + cfg.batch_size).min(keys.len()); + let list = keys[start..end] + .iter() + .map(|k| format!("'{}'", cypher_quote(k))) + .collect::>() + .join(", "); + let q = format!("UNWIND [{list}] AS key CREATE (:User {{key: key}})"); + memgraph_run(graph, &q).await?; + } + + for start in (0..edges.len()).step_by(cfg.batch_size) { + let end = (start + cfg.batch_size).min(edges.len()); + let pairs = edges[start..end] + .iter() + .map(|(src, dst)| { + format!( + "['{}','{}']", + cypher_quote(&keys[*src]), + cypher_quote(&keys[*dst]) + ) + }) + .collect::>() + .join(", "); + + let q = format!( + "UNWIND [{pairs}] AS pair \ + MATCH (s:User {{key: pair[0]}}) \ + MATCH (d:User {{key: pair[1]}}) \ + CREATE (s)-[:KNOWS]->(d)" + ); + memgraph_run(graph, &q).await?; + } + + Ok(started.elapsed().as_millis()) +} + +async fn benchmark_memgraph_query( + graph: &Graph, + cfg: &BenchConfig, +) -> Result<(LatencyStats, usize), Box> { + let q = "MATCH (a:User {key: 'user:alice'})-[:KNOWS]->(b) RETURN b.key AS key"; + + for _ in 0..cfg.warmup { + let _ = memgraph_count_rows(graph, q).await?; + } + + let mut samples = Vec::with_capacity(cfg.iterations); + let mut result_count = 0usize; + for _ in 0..cfg.iterations { + let start = Instant::now(); + result_count = memgraph_count_rows(graph, q).await?; + samples.push(start.elapsed().as_nanos()); + } + + Ok((compute_stats(&mut samples), result_count)) +} + +async fn async_main() -> Result<(), Box> { + let cfg = parse_args().map_err(|e| format!("argument error: {e}"))?; + let (keys, edges) = build_workload(&cfg); + + let temp = tempdir()?; + let raydb_path = temp.path().join("ray-vs-memgraph.kitedb"); + + println!("RayDB vs Memgraph: 1-hop traversal"); + println!("Nodes: {}", format_number(cfg.nodes)); + println!("Edges: {}", format_number(cfg.edges)); + println!("Alice expected results: {}", cfg.query_results); + println!( + "Iterations: {} (warmup {})", + format_number(cfg.iterations), + format_number(cfg.warmup) + ); + println!(); + + let (ray_ingest_ms, _knows_id) = ingest_raydb(&raydb_path, &cfg, &keys, &edges)?; + let graph = memgraph_connect(&cfg).await?; + let memgraph_ingest_ms = ingest_memgraph(&graph, &cfg, &keys, &edges).await?; + + let (ray_stats, ray_count) = benchmark_raydb_query(&raydb_path, &cfg)?; + let (mem_stats, mem_count) = benchmark_memgraph_query(&graph, &cfg).await?; + + if ray_count != cfg.query_results { + return Err( + format!( + "RayDB result mismatch: got {}, expected {}", + ray_count, cfg.query_results + ) + .into(), + ); + } + if mem_count != cfg.query_results { + return Err( + format!( + "Memgraph result mismatch: got {}, expected {}", + mem_count, cfg.query_results + ) + .into(), + ); + } + + println!("Setup times (not included in query latency):"); + println!(" RayDB ingest: {:.2}ms", ray_ingest_ms as f64); + println!(" Memgraph ingest: {:.2}ms", memgraph_ingest_ms as f64); + println!(); + println!("Query latency (from(alice).out(KNOWS).toArray equivalent):"); + print_stats("RayDB", ray_stats); + print_stats("Memgraph", mem_stats); + + if ray_stats.p50 > 0 && ray_stats.p95 > 0 { + println!(); + println!( + "Memgraph/RayDB ratio: p50={:.2}x p95={:.2}x", + mem_stats.p50 as f64 / ray_stats.p50 as f64, + mem_stats.p95 as f64 / ray_stats.p95 as f64 + ); + } + + if cfg.keep_db { + persist_temp(temp, &raydb_path)?; + } + + Ok(()) +} + +fn persist_temp(temp: TempDir, raydb_path: &std::path::Path) -> Result<(), Box> { + let keep_dir = temp.keep(); + println!(); + println!("RayDB dataset kept at: {}", raydb_path.display()); + println!("Temp dir: {}", keep_dir.display()); + Ok(()) +} + +fn main() -> Result<(), Box> { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; + rt.block_on(async_main()) +} diff --git a/ray-rs/examples/replication_catchup_bench.rs b/ray-rs/examples/replication_catchup_bench.rs new file mode 100644 index 0000000..d0ae3ef --- /dev/null +++ b/ray-rs/examples/replication_catchup_bench.rs @@ -0,0 +1,285 @@ +//! Replication catch-up throughput benchmark. +//! +//! Usage: +//! cargo run --release --example replication_catchup_bench --no-default-features -- [options] +//! +//! Options: +//! --seed-commits N Commits before replica bootstrap (default: 1000) +//! --backlog-commits N Commits generated after bootstrap, then caught up (default: 5000) +//! --max-frames N Max frames per catch-up pull (default: 256) +//! --sync-mode MODE Sync mode: full|normal|off (default: normal) +//! --segment-max-bytes N Segment rotation threshold (default: 67108864) +//! --retention-min N Retention minimum entries (default: 20000) + +use std::env; +use std::time::{Duration, Instant}; + +use tempfile::tempdir; + +use kitedb::core::single_file::{ + close_single_file, open_single_file, SingleFileDB, SingleFileOpenOptions, SyncMode, +}; +use kitedb::replication::types::ReplicationRole; + +#[derive(Debug, Clone)] +struct BenchConfig { + seed_commits: usize, + backlog_commits: usize, + max_frames: usize, + sync_mode: SyncMode, + segment_max_bytes: u64, + retention_min_entries: u64, +} + +impl Default for BenchConfig { + fn default() -> Self { + Self { + seed_commits: 1000, + backlog_commits: 5000, + max_frames: 256, + sync_mode: SyncMode::Normal, + segment_max_bytes: 64 * 1024 * 1024, + retention_min_entries: 20_000, + } + } +} + +fn parse_args() -> BenchConfig { + let mut config = BenchConfig::default(); + let args: Vec = env::args().collect(); + + let mut i = 1; + while i < args.len() { + match args[i].as_str() { + "--seed-commits" => { + if let Some(value) = args.get(i + 1) { + config.seed_commits = value.parse().unwrap_or(config.seed_commits); + i += 1; + } + } + "--backlog-commits" => { + if let Some(value) = args.get(i + 1) { + config.backlog_commits = value.parse().unwrap_or(config.backlog_commits); + i += 1; + } + } + "--max-frames" => { + if let Some(value) = args.get(i + 1) { + config.max_frames = value.parse().unwrap_or(config.max_frames); + i += 1; + } + } + "--sync-mode" => { + if let Some(value) = args.get(i + 1) { + config.sync_mode = match value.to_ascii_lowercase().as_str() { + "full" => SyncMode::Full, + "off" => SyncMode::Off, + _ => SyncMode::Normal, + }; + i += 1; + } + } + "--segment-max-bytes" => { + if let Some(value) = args.get(i + 1) { + config.segment_max_bytes = value.parse().unwrap_or(config.segment_max_bytes); + i += 1; + } + } + "--retention-min" => { + if let Some(value) = args.get(i + 1) { + config.retention_min_entries = value.parse().unwrap_or(config.retention_min_entries); + i += 1; + } + } + _ => {} + } + i += 1; + } + + if config.max_frames == 0 { + config.max_frames = 1; + } + if config.backlog_commits == 0 { + config.backlog_commits = 1; + } + config.retention_min_entries = config + .retention_min_entries + .max(config.backlog_commits as u64); + config +} + +fn sync_mode_label(mode: SyncMode) -> &'static str { + match mode { + SyncMode::Full => "full", + SyncMode::Normal => "normal", + SyncMode::Off => "off", + } +} + +fn throughput(frames: usize, elapsed: Duration) -> f64 { + if frames == 0 { + return 0.0; + } + let secs = elapsed.as_secs_f64(); + if secs <= f64::EPSILON { + frames as f64 + } else { + frames as f64 / secs + } +} + +fn open_primary( + path: &std::path::Path, + sidecar: &std::path::Path, + config: &BenchConfig, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .sync_mode(config.sync_mode) + .auto_checkpoint(false) + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(sidecar) + .replication_segment_max_bytes(config.segment_max_bytes) + .replication_retention_min_entries(config.retention_min_entries), + ) +} + +fn open_replica( + path: &std::path::Path, + sidecar: &std::path::Path, + source_db_path: &std::path::Path, + source_sidecar: &std::path::Path, + config: &BenchConfig, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .sync_mode(config.sync_mode) + .auto_checkpoint(false) + .replication_role(ReplicationRole::Replica) + .replication_sidecar_path(sidecar) + .replication_source_db_path(source_db_path) + .replication_source_sidecar_path(source_sidecar), + ) +} + +fn append_commits( + db: &SingleFileDB, + label: &str, + count: usize, + offset: usize, +) -> kitedb::Result<()> { + for i in 0..count { + db.begin(false)?; + db.create_node(Some(&format!("{label}:{}", offset + i)))?; + let _ = db.commit_with_token()?; + } + Ok(()) +} + +fn main() -> kitedb::Result<()> { + let config = parse_args(); + println!("replication_catchup_bench"); + println!("sync_mode: {}", sync_mode_label(config.sync_mode)); + println!("seed_commits: {}", config.seed_commits); + println!("backlog_commits: {}", config.backlog_commits); + println!("max_frames: {}", config.max_frames); + + let dir = tempdir().expect("tempdir"); + let primary_db_path = dir.path().join("bench-primary.kitedb"); + let primary_sidecar = dir.path().join("bench-primary.sidecar"); + let replica_db_path = dir.path().join("bench-replica.kitedb"); + let replica_sidecar = dir.path().join("bench-replica.sidecar"); + + let primary = open_primary(&primary_db_path, &primary_sidecar, &config)?; + append_commits(&primary, "seed", config.seed_commits, 0)?; + + let replica = open_replica( + &replica_db_path, + &replica_sidecar, + &primary_db_path, + &primary_sidecar, + &config, + )?; + replica.replica_bootstrap_from_snapshot()?; + + let produce_start = Instant::now(); + append_commits( + &primary, + "backlog", + config.backlog_commits, + config.seed_commits, + )?; + let produce_elapsed = produce_start.elapsed(); + let _ = primary.primary_run_retention()?; + + let catchup_start = Instant::now(); + let mut catchup_loops = 0usize; + let mut applied_frames = 0usize; + loop { + let applied = replica.replica_catch_up_once(config.max_frames)?; + if applied == 0 { + break; + } + applied_frames = applied_frames.saturating_add(applied); + catchup_loops = catchup_loops.saturating_add(1); + } + let catchup_elapsed = catchup_start.elapsed(); + + let primary_status = primary + .primary_replication_status() + .ok_or_else(|| kitedb::KiteError::InvalidReplication("missing primary status".to_string()))?; + let replica_status = replica + .replica_replication_status() + .ok_or_else(|| kitedb::KiteError::InvalidReplication("missing replica status".to_string()))?; + + if replica_status.applied_epoch != primary_status.epoch + || replica_status.applied_log_index != primary_status.head_log_index + { + return Err(kitedb::KiteError::InvalidReplication(format!( + "catch-up mismatch: replica at {}:{}, primary at {}:{}", + replica_status.applied_epoch, + replica_status.applied_log_index, + primary_status.epoch, + primary_status.head_log_index + ))); + } + + if replica.count_nodes() != primary.count_nodes() { + return Err(kitedb::KiteError::InvalidReplication( + "replica node count mismatch after catch-up".to_string(), + )); + } + + let primary_fps = throughput(config.backlog_commits, produce_elapsed); + let catchup_fps = throughput(applied_frames, catchup_elapsed); + let throughput_ratio = if primary_fps <= f64::EPSILON { + 0.0 + } else { + catchup_fps / primary_fps + }; + + println!("applied_frames: {}", applied_frames); + println!("catchup_loops: {}", catchup_loops); + println!( + "produce_elapsed_ms: {:.3}", + produce_elapsed.as_secs_f64() * 1000.0 + ); + println!( + "catchup_elapsed_ms: {:.3}", + catchup_elapsed.as_secs_f64() * 1000.0 + ); + println!("primary_frames_per_sec: {:.2}", primary_fps); + println!("catchup_frames_per_sec: {:.2}", catchup_fps); + println!("throughput_ratio: {:.4}", throughput_ratio); + println!("primary_head_log_index: {}", primary_status.head_log_index); + println!( + "replica_applied: {}:{}", + replica_status.applied_epoch, replica_status.applied_log_index + ); + + close_single_file(replica)?; + close_single_file(primary)?; + Ok(()) +} diff --git a/ray-rs/examples/replication_soak_bench.rs b/ray-rs/examples/replication_soak_bench.rs new file mode 100644 index 0000000..76355d1 --- /dev/null +++ b/ray-rs/examples/replication_soak_bench.rs @@ -0,0 +1,615 @@ +//! Replication long-run soak benchmark with lag churn, promotion fencing, and reseed recovery. +//! +//! Usage: +//! cargo run --release --example replication_soak_bench --no-default-features -- [options] +//! +//! Options: +//! --replicas N Replica count (default: 5) +//! --cycles N Soak cycles (default: 18) +//! --commits-per-cycle N Primary commits per cycle (default: 120) +//! --active-replicas N Replicas actively catching up each cycle (default: 3) +//! --churn-interval N Cycles before rotating active replica window (default: 3) +//! --promotion-interval N Promote primary every N cycles; 0 disables (default: 6) +//! --reseed-check-interval N Probe lagging replicas for reseed every N cycles; 0 disables (default: 3) +//! --max-frames N Max frames per replica pull (default: 128) +//! --recovery-max-loops N Max catch-up loops when recovering lag (default: 80) +//! --segment-max-bytes N Sidecar segment rotation threshold (default: 1) +//! --retention-min N Primary retention min entries (default: 64) +//! --sync-mode MODE Sync mode: full|normal|off (default: normal) + +use std::env; +use std::time::Instant; + +use tempfile::tempdir; + +use kitedb::core::single_file::{ + close_single_file, open_single_file, SingleFileDB, SingleFileOpenOptions, SyncMode, +}; +use kitedb::replication::types::ReplicationRole; + +#[derive(Debug, Clone)] +struct SoakConfig { + replicas: usize, + cycles: usize, + commits_per_cycle: usize, + active_replicas_per_cycle: usize, + churn_interval: usize, + promotion_interval: usize, + reseed_check_interval: usize, + max_frames: usize, + recovery_max_loops: usize, + segment_max_bytes: u64, + retention_min_entries: u64, + sync_mode: SyncMode, +} + +impl Default for SoakConfig { + fn default() -> Self { + Self { + replicas: 5, + cycles: 18, + commits_per_cycle: 120, + active_replicas_per_cycle: 3, + churn_interval: 3, + promotion_interval: 6, + reseed_check_interval: 3, + max_frames: 128, + recovery_max_loops: 80, + segment_max_bytes: 1, + retention_min_entries: 64, + sync_mode: SyncMode::Normal, + } + } +} + +struct ReplicaSlot { + id: String, + db: SingleFileDB, +} + +fn parse_args() -> SoakConfig { + let mut config = SoakConfig::default(); + let args: Vec = env::args().collect(); + + let mut i = 1; + while i < args.len() { + match args[i].as_str() { + "--replicas" => { + if let Some(value) = args.get(i + 1) { + config.replicas = value.parse().unwrap_or(config.replicas); + i += 1; + } + } + "--cycles" => { + if let Some(value) = args.get(i + 1) { + config.cycles = value.parse().unwrap_or(config.cycles); + i += 1; + } + } + "--commits-per-cycle" => { + if let Some(value) = args.get(i + 1) { + config.commits_per_cycle = value.parse().unwrap_or(config.commits_per_cycle); + i += 1; + } + } + "--active-replicas" => { + if let Some(value) = args.get(i + 1) { + config.active_replicas_per_cycle = + value.parse().unwrap_or(config.active_replicas_per_cycle); + i += 1; + } + } + "--churn-interval" => { + if let Some(value) = args.get(i + 1) { + config.churn_interval = value.parse().unwrap_or(config.churn_interval); + i += 1; + } + } + "--promotion-interval" => { + if let Some(value) = args.get(i + 1) { + config.promotion_interval = value.parse().unwrap_or(config.promotion_interval); + i += 1; + } + } + "--reseed-check-interval" => { + if let Some(value) = args.get(i + 1) { + config.reseed_check_interval = value.parse().unwrap_or(config.reseed_check_interval); + i += 1; + } + } + "--max-frames" => { + if let Some(value) = args.get(i + 1) { + config.max_frames = value.parse().unwrap_or(config.max_frames); + i += 1; + } + } + "--recovery-max-loops" => { + if let Some(value) = args.get(i + 1) { + config.recovery_max_loops = value.parse().unwrap_or(config.recovery_max_loops); + i += 1; + } + } + "--segment-max-bytes" => { + if let Some(value) = args.get(i + 1) { + config.segment_max_bytes = value.parse().unwrap_or(config.segment_max_bytes); + i += 1; + } + } + "--retention-min" => { + if let Some(value) = args.get(i + 1) { + config.retention_min_entries = value.parse().unwrap_or(config.retention_min_entries); + i += 1; + } + } + "--sync-mode" => { + if let Some(value) = args.get(i + 1) { + config.sync_mode = match value.to_ascii_lowercase().as_str() { + "full" => SyncMode::Full, + "off" => SyncMode::Off, + _ => SyncMode::Normal, + }; + i += 1; + } + } + _ => {} + } + i += 1; + } + + config.replicas = config.replicas.max(1); + config.cycles = config.cycles.max(1); + config.commits_per_cycle = config.commits_per_cycle.max(1); + config.active_replicas_per_cycle = config.active_replicas_per_cycle.max(1).min(config.replicas); + config.churn_interval = config.churn_interval.max(1); + config.max_frames = config.max_frames.max(1); + config.recovery_max_loops = config.recovery_max_loops.max(1); + config.segment_max_bytes = config.segment_max_bytes.max(1); + config.retention_min_entries = config.retention_min_entries.max(1); + config +} + +fn sync_mode_label(mode: SyncMode) -> &'static str { + match mode { + SyncMode::Full => "full", + SyncMode::Normal => "normal", + SyncMode::Off => "off", + } +} + +fn open_primary( + path: &std::path::Path, + sidecar: &std::path::Path, + config: &SoakConfig, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .sync_mode(config.sync_mode) + .auto_checkpoint(false) + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(sidecar) + .replication_segment_max_bytes(config.segment_max_bytes) + .replication_retention_min_entries(config.retention_min_entries), + ) +} + +fn open_replica( + path: &std::path::Path, + sidecar: &std::path::Path, + source_db: &std::path::Path, + source_sidecar: &std::path::Path, + config: &SoakConfig, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .sync_mode(config.sync_mode) + .auto_checkpoint(false) + .replication_role(ReplicationRole::Replica) + .replication_sidecar_path(sidecar) + .replication_source_db_path(source_db) + .replication_source_sidecar_path(source_sidecar), + ) +} + +fn primary_status( + db: &SingleFileDB, +) -> kitedb::Result { + db.primary_replication_status().ok_or_else(|| { + kitedb::KiteError::InvalidReplication("missing primary replication status".to_string()) + }) +} + +fn replica_status( + db: &SingleFileDB, +) -> kitedb::Result { + db.replica_replication_status().ok_or_else(|| { + kitedb::KiteError::InvalidReplication("missing replica replication status".to_string()) + }) +} + +fn append_cycle_commits( + db: &SingleFileDB, + cycle: usize, + count: usize, + next_id: &mut usize, + expected_keys: &mut Vec, +) -> kitedb::Result<()> { + for _ in 0..count { + let key = format!("soak-{cycle}-{}", *next_id); + db.begin(false)?; + db.create_node(Some(&key))?; + let _ = db.commit_with_token()?.ok_or_else(|| { + kitedb::KiteError::InvalidReplication("primary commit token missing".to_string()) + })?; + expected_keys.push(key); + *next_id = next_id.saturating_add(1); + } + Ok(()) +} + +fn catch_up_to_target( + replica: &SingleFileDB, + target_log_index: u64, + max_frames: usize, + max_loops: usize, +) -> kitedb::Result { + let mut loops = 0usize; + loop { + let status = replica_status(replica)?; + if status.needs_reseed { + return Err(kitedb::KiteError::InvalidReplication( + "replica needs reseed".to_string(), + )); + } + if status.applied_log_index >= target_log_index { + return Ok(loops); + } + if loops >= max_loops { + return Err(kitedb::KiteError::InvalidReplication(format!( + "replica catch-up exceeded max loops ({max_loops})" + ))); + } + + let applied = match replica.replica_catch_up_once(max_frames) { + Ok(applied) => applied, + Err(err) => { + let status = replica_status(replica)?; + if status.needs_reseed || err.to_string().contains("reseed") { + return Err(kitedb::KiteError::InvalidReplication( + "replica needs reseed".to_string(), + )); + } + return Err(err); + } + }; + + loops = loops.saturating_add(1); + if applied == 0 { + let status = replica_status(replica)?; + if status.applied_log_index >= target_log_index { + return Ok(loops); + } + return Err(kitedb::KiteError::InvalidReplication( + "replica catch-up stalled before target".to_string(), + )); + } + } +} + +fn main() -> kitedb::Result<()> { + let config = parse_args(); + println!("replication_soak_bench"); + println!("sync_mode: {}", sync_mode_label(config.sync_mode)); + println!("replicas: {}", config.replicas); + println!("cycles: {}", config.cycles); + println!("commits_per_cycle: {}", config.commits_per_cycle); + println!( + "active_replicas_per_cycle: {}", + config.active_replicas_per_cycle + ); + println!("churn_interval: {}", config.churn_interval); + println!("promotion_interval: {}", config.promotion_interval); + println!("reseed_check_interval: {}", config.reseed_check_interval); + println!("max_frames: {}", config.max_frames); + println!("recovery_max_loops: {}", config.recovery_max_loops); + + let started = Instant::now(); + let dir = tempdir().expect("tempdir"); + let primary_db_path = dir.path().join("soak-primary.kitedb"); + let primary_sidecar = dir.path().join("soak-primary.sidecar"); + + let primary = open_primary(&primary_db_path, &primary_sidecar, &config)?; + let mut stale_probe = open_primary(&primary_db_path, &primary_sidecar, &config)?; + + let mut replicas: Vec = Vec::with_capacity(config.replicas); + for idx in 0..config.replicas { + let replica_db_path = dir.path().join(format!("soak-replica-{idx}.kitedb")); + let replica_sidecar = dir.path().join(format!("soak-replica-{idx}.sidecar")); + let replica = open_replica( + &replica_db_path, + &replica_sidecar, + &primary_db_path, + &primary_sidecar, + &config, + )?; + replica.replica_bootstrap_from_snapshot()?; + replicas.push(ReplicaSlot { + id: format!("replica-{idx}"), + db: replica, + }); + } + + let mut expected_keys = + Vec::with_capacity(config.cycles.saturating_mul(config.commits_per_cycle)); + let mut next_id = 0usize; + + let mut writes_committed = 0usize; + let mut promotion_count = 0usize; + let mut stale_fence_rejections = 0usize; + let mut reseed_count = 0usize; + let mut reseed_recovery_successes = 0usize; + let mut max_recovery_loops_seen = 0usize; + let mut max_observed_lag = 0u64; + let divergence_violations = 0usize; + + for cycle in 0..config.cycles { + append_cycle_commits( + &primary, + cycle, + config.commits_per_cycle, + &mut next_id, + &mut expected_keys, + )?; + writes_committed = writes_committed.saturating_add(config.commits_per_cycle); + + let head = primary_status(&primary)?; + + let active_start = (cycle / config.churn_interval) % replicas.len(); + let mut active = vec![false; replicas.len()]; + for offset in 0..config.active_replicas_per_cycle { + active[(active_start + offset) % replicas.len()] = true; + } + + for (idx, slot) in replicas.iter_mut().enumerate() { + if !active[idx] { + continue; + } + + let loops = match catch_up_to_target( + &slot.db, + head.head_log_index, + config.max_frames, + config.recovery_max_loops, + ) { + Ok(loops) => loops, + Err(err) => { + let status = replica_status(&slot.db)?; + if status.needs_reseed || err.to_string().contains("reseed") { + reseed_count = reseed_count.saturating_add(1); + primary.checkpoint()?; + slot.db.replica_reseed_from_snapshot()?; + reseed_recovery_successes = reseed_recovery_successes.saturating_add(1); + catch_up_to_target( + &slot.db, + head.head_log_index, + config.max_frames, + config.recovery_max_loops, + )? + } else { + return Err(err); + } + } + }; + max_recovery_loops_seen = max_recovery_loops_seen.max(loops); + + let status = replica_status(&slot.db)?; + primary.primary_report_replica_progress( + &slot.id, + status.applied_epoch, + status.applied_log_index, + )?; + } + + let _ = primary.primary_run_retention()?; + + let should_probe_reseed = + config.reseed_check_interval > 0 && (cycle + 1) % config.reseed_check_interval == 0; + if should_probe_reseed { + let head = primary_status(&primary)?; + for (idx, slot) in replicas.iter_mut().enumerate() { + if active[idx] { + continue; + } + + match slot.db.replica_catch_up_once(config.max_frames) { + Ok(_) => {} + Err(err) => { + let status = replica_status(&slot.db)?; + if status.needs_reseed || err.to_string().contains("reseed") { + reseed_count = reseed_count.saturating_add(1); + primary.checkpoint()?; + slot.db.replica_reseed_from_snapshot()?; + reseed_recovery_successes = reseed_recovery_successes.saturating_add(1); + let loops = catch_up_to_target( + &slot.db, + head.head_log_index, + config.max_frames, + config.recovery_max_loops, + )?; + max_recovery_loops_seen = max_recovery_loops_seen.max(loops); + let status = replica_status(&slot.db)?; + primary.primary_report_replica_progress( + &slot.id, + status.applied_epoch, + status.applied_log_index, + )?; + } else { + return Err(err); + } + } + } + } + } + + let head = primary_status(&primary)?; + for slot in &replicas { + let status = replica_status(&slot.db)?; + let lag = head.head_log_index.saturating_sub(status.applied_log_index); + max_observed_lag = max_observed_lag.max(lag); + } + + if config.promotion_interval > 0 && (cycle + 1) % config.promotion_interval == 0 { + // Before epoch change, force all replicas to converge to current head. + // This keeps promotion checks deterministic under churn and retention pressure. + let head_before_promotion = primary_status(&primary)?; + for slot in &mut replicas { + let loops = match catch_up_to_target( + &slot.db, + head_before_promotion.head_log_index, + config.max_frames, + config.recovery_max_loops, + ) { + Ok(loops) => loops, + Err(err) => { + let status = replica_status(&slot.db)?; + if status.needs_reseed || err.to_string().contains("reseed") { + reseed_count = reseed_count.saturating_add(1); + primary.checkpoint()?; + slot.db.replica_reseed_from_snapshot()?; + reseed_recovery_successes = reseed_recovery_successes.saturating_add(1); + catch_up_to_target( + &slot.db, + head_before_promotion.head_log_index, + config.max_frames, + config.recovery_max_loops, + )? + } else { + return Err(err); + } + } + }; + max_recovery_loops_seen = max_recovery_loops_seen.max(loops); + + let status = replica_status(&slot.db)?; + primary.primary_report_replica_progress( + &slot.id, + status.applied_epoch, + status.applied_log_index, + )?; + } + + let _ = primary.primary_promote_to_next_epoch()?; + promotion_count = promotion_count.saturating_add(1); + + // Force stale handle manifest refresh before write probe so fencing is deterministic. + let _ = stale_probe.primary_run_retention(); + + stale_probe.begin(false)?; + stale_probe.create_node(Some(&format!("stale-probe-{cycle}")))?; + match stale_probe.commit_with_token() { + Ok(_) => { + return Err(kitedb::KiteError::InvalidReplication( + "stale writer unexpectedly committed after promotion".to_string(), + )); + } + Err(err) => { + if err.to_string().contains("stale primary") { + stale_fence_rejections = stale_fence_rejections.saturating_add(1); + } else { + return Err(err); + } + } + } + + let _ = stale_probe.rollback(); + close_single_file(stale_probe)?; + stale_probe = open_primary(&primary_db_path, &primary_sidecar, &config)?; + } + + if cycle % 3 == 0 || cycle + 1 == config.cycles { + println!( + "progress_cycle: {} primary_epoch: {} primary_head_log_index: {} reseeds: {} promotions: {}", + cycle + 1, + head.epoch, + head.head_log_index, + reseed_count, + promotion_count + ); + } + } + + let final_head = primary_status(&primary)?; + for slot in &mut replicas { + let loops = match catch_up_to_target( + &slot.db, + final_head.head_log_index, + config.max_frames, + config.recovery_max_loops, + ) { + Ok(loops) => loops, + Err(err) => { + let status = replica_status(&slot.db)?; + if status.needs_reseed || err.to_string().contains("reseed") { + reseed_count = reseed_count.saturating_add(1); + primary.checkpoint()?; + slot.db.replica_reseed_from_snapshot()?; + reseed_recovery_successes = reseed_recovery_successes.saturating_add(1); + catch_up_to_target( + &slot.db, + final_head.head_log_index, + config.max_frames, + config.recovery_max_loops, + )? + } else { + return Err(err); + } + } + }; + max_recovery_loops_seen = max_recovery_loops_seen.max(loops); + + if slot.db.count_nodes() != primary.count_nodes() { + return Err(kitedb::KiteError::InvalidReplication(format!( + "node-count divergence on {}: replica={} primary={}", + slot.id, + slot.db.count_nodes(), + primary.count_nodes() + ))); + } + + for key in &expected_keys { + if slot.db.node_by_key(key).is_none() { + return Err(kitedb::KiteError::InvalidReplication(format!( + "missing key on {}: {key}", + slot.id + ))); + } + } + } + + let elapsed_ms = started.elapsed().as_secs_f64() * 1000.0; + let final_head = primary_status(&primary)?; + + println!("writes_committed: {}", writes_committed); + println!("promotion_count: {}", promotion_count); + println!("stale_fence_rejections: {}", stale_fence_rejections); + println!("reseed_count: {}", reseed_count); + println!("reseed_recovery_successes: {}", reseed_recovery_successes); + println!("max_recovery_loops: {}", max_recovery_loops_seen); + println!("max_observed_lag: {}", max_observed_lag); + println!("divergence_violations: {}", divergence_violations); + println!("final_primary_epoch: {}", final_head.epoch); + println!( + "final_primary_head_log_index: {}", + final_head.head_log_index + ); + println!("final_primary_nodes: {}", primary.count_nodes()); + println!("elapsed_ms: {:.3}", elapsed_ms); + + for slot in replicas { + close_single_file(slot.db)?; + } + close_single_file(stale_probe)?; + close_single_file(primary)?; + Ok(()) +} diff --git a/ray-rs/examples/single_file_raw_bench.rs b/ray-rs/examples/single_file_raw_bench.rs index dc53bde..0f9791d 100644 --- a/ray-rs/examples/single_file_raw_bench.rs +++ b/ray-rs/examples/single_file_raw_bench.rs @@ -17,6 +17,8 @@ //! --no-auto-checkpoint Disable auto-checkpoint //! --vector-dims N Vector dimensions (default: 128) //! --vector-count N Number of vectors to set (default: 1000) +//! --replication-primary Enable primary replication sidecar on open options +//! --replication-segment-max-bytes BYTES Primary segment rotation threshold when replication is enabled //! --keep-db Keep the database file after benchmark use rand::{rngs::StdRng, Rng, SeedableRng}; @@ -27,6 +29,7 @@ use tempfile::tempdir; use kitedb::core::single_file::{ close_single_file, open_single_file, SingleFileOpenOptions, SyncMode, }; +use kitedb::replication::types::ReplicationRole; use kitedb::types::PropValue; #[derive(Debug, Clone)] @@ -44,6 +47,8 @@ struct BenchConfig { auto_checkpoint: bool, vector_dims: usize, vector_count: usize, + replication_primary: bool, + replication_segment_max_bytes: Option, keep_db: bool, skip_checkpoint: bool, reopen_readonly: bool, @@ -65,6 +70,8 @@ impl Default for BenchConfig { auto_checkpoint: true, vector_dims: 128, vector_count: 1000, + replication_primary: false, + replication_segment_max_bytes: None, keep_db: false, skip_checkpoint: false, reopen_readonly: false, @@ -155,6 +162,16 @@ fn parse_args() -> BenchConfig { i += 1; } } + "--replication-primary" => { + config.replication_primary = true; + } + "--replication-segment-max-bytes" => { + if let Some(value) = args.get(i + 1) { + config.replication_segment_max_bytes = + value.parse().ok().filter(|parsed: &u64| *parsed > 0); + i += 1; + } + } "--skip-checkpoint" => { config.skip_checkpoint = true; } @@ -648,6 +665,13 @@ fn main() { println!("Checkpoint threshold: {}", config.checkpoint_threshold); println!("Vector dims: {}", format_number(config.vector_dims)); println!("Vector count: {}", format_number(config.vector_count)); + println!("Replication primary: {}", config.replication_primary); + if let Some(bytes) = config.replication_segment_max_bytes { + println!( + "Replication segment max bytes: {}", + format_number(bytes as usize) + ); + } println!("Skip checkpoint: {}", config.skip_checkpoint); println!("Reopen read-only: {}", config.reopen_readonly); println!("{}", "=".repeat(120)); @@ -666,6 +690,12 @@ fn main() { .group_commit_enabled(true) .group_commit_window_ms(config.group_commit_window_ms); } + if config.replication_primary { + options = options.replication_role(ReplicationRole::Primary); + if let Some(max_bytes) = config.replication_segment_max_bytes { + options = options.replication_segment_max_bytes(max_bytes); + } + } let mut db = open_single_file(&db_path, options).expect("failed to open single-file db"); diff --git a/ray-rs/examples/vector_ann_bench.rs b/ray-rs/examples/vector_ann_bench.rs new file mode 100644 index 0000000..1259601 --- /dev/null +++ b/ray-rs/examples/vector_ann_bench.rs @@ -0,0 +1,402 @@ +//! ANN algorithm benchmark (IVF vs IVF-PQ) +//! +//! Usage: +//! cargo run --release --example vector_ann_bench --no-default-features -- [options] +//! +//! Options: +//! --algorithm ivf|ivf_pq Algorithm to benchmark (default: ivf_pq) +//! --vectors N Number of vectors (default: 20000) +//! --dimensions D Vector dimensions (default: 384) +//! --queries N Query count (default: 200) +//! --k N Top-k (default: 10) +//! --n-clusters N IVF clusters (default: sqrt(vectors) clamped to [16,1024]) +//! --n-probe N Probe count (default: 10) +//! --pq-subspaces N PQ subspaces for IVF-PQ (default: 48) +//! --pq-centroids N PQ centroids per subspace (default: 256) +//! --residuals true|false Use residual encoding for IVF-PQ (default: true) +//! --seed N RNG seed (default: 42) + +use kitedb::types::NodeId; +use kitedb::vector::{ + create_vector_store, normalize, vector_store_all_vectors, vector_store_insert, + vector_store_vector_by_id, DistanceMetric, IvfConfig, IvfIndex, IvfPqConfig, IvfPqIndex, + IvfPqSearchOptions, SearchOptions, VectorManifest, VectorSearchResult, VectorStoreConfig, +}; +use rand::{rngs::StdRng, Rng, SeedableRng}; +use std::cmp::Ordering; +use std::collections::HashSet; +use std::env; +use std::time::Instant; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Algorithm { + Ivf, + IvfPq, +} + +impl Algorithm { + fn parse(raw: &str) -> Option { + match raw.trim().to_lowercase().as_str() { + "ivf" => Some(Self::Ivf), + "ivf_pq" => Some(Self::IvfPq), + _ => None, + } + } + + fn as_str(&self) -> &'static str { + match self { + Self::Ivf => "ivf", + Self::IvfPq => "ivf_pq", + } + } +} + +#[derive(Debug, Clone)] +struct BenchConfig { + algorithm: Algorithm, + vectors: usize, + dimensions: usize, + queries: usize, + k: usize, + n_clusters: Option, + n_probe: usize, + pq_subspaces: usize, + pq_centroids: usize, + residuals: bool, + seed: u64, +} + +impl Default for BenchConfig { + fn default() -> Self { + Self { + algorithm: Algorithm::IvfPq, + vectors: 20_000, + dimensions: 384, + queries: 200, + k: 10, + n_clusters: None, + n_probe: 10, + pq_subspaces: 48, + pq_centroids: 256, + residuals: false, + seed: 42, + } + } +} + +fn parse_args() -> BenchConfig { + let mut config = BenchConfig::default(); + let args: Vec = env::args().collect(); + let mut i = 1usize; + + while i < args.len() { + match args[i].as_str() { + "--algorithm" => { + if let Some(value) = args.get(i + 1) { + if let Some(parsed) = Algorithm::parse(value) { + config.algorithm = parsed; + } + i += 1; + } + } + "--vectors" => { + if let Some(value) = args.get(i + 1) { + config.vectors = value.parse().unwrap_or(config.vectors); + i += 1; + } + } + "--dimensions" => { + if let Some(value) = args.get(i + 1) { + config.dimensions = value.parse().unwrap_or(config.dimensions); + i += 1; + } + } + "--queries" => { + if let Some(value) = args.get(i + 1) { + config.queries = value.parse().unwrap_or(config.queries); + i += 1; + } + } + "--k" => { + if let Some(value) = args.get(i + 1) { + config.k = value.parse().unwrap_or(config.k); + i += 1; + } + } + "--n-clusters" => { + if let Some(value) = args.get(i + 1) { + config.n_clusters = value.parse::().ok(); + i += 1; + } + } + "--n-probe" => { + if let Some(value) = args.get(i + 1) { + config.n_probe = value.parse().unwrap_or(config.n_probe); + i += 1; + } + } + "--pq-subspaces" => { + if let Some(value) = args.get(i + 1) { + config.pq_subspaces = value.parse().unwrap_or(config.pq_subspaces); + i += 1; + } + } + "--pq-centroids" => { + if let Some(value) = args.get(i + 1) { + config.pq_centroids = value.parse().unwrap_or(config.pq_centroids); + i += 1; + } + } + "--residuals" => { + if let Some(value) = args.get(i + 1) { + config.residuals = matches!( + value.trim().to_ascii_lowercase().as_str(), + "1" | "true" | "yes" + ); + i += 1; + } + } + "--seed" => { + if let Some(value) = args.get(i + 1) { + config.seed = value.parse().unwrap_or(config.seed); + i += 1; + } + } + _ => {} + } + i += 1; + } + + config.vectors = config.vectors.max(1); + config.dimensions = config.dimensions.max(1); + config.queries = config.queries.max(1); + config.k = config.k.max(1).min(config.vectors); + config.n_probe = config.n_probe.max(1); + config.pq_subspaces = config.pq_subspaces.max(1); + config.pq_centroids = config.pq_centroids.max(2); + config +} + +fn random_vector(rng: &mut StdRng, dimensions: usize) -> Vec { + let mut vector = vec![0.0f32; dimensions]; + for value in &mut vector { + *value = rng.gen_range(-1.0f32..1.0f32); + } + vector +} + +fn percentile(sorted: &[u128], ratio: f64) -> u128 { + if sorted.is_empty() { + return 0; + } + let idx = ((sorted.len() as f64) * ratio) + .floor() + .min((sorted.len() - 1) as f64) as usize; + sorted[idx] +} + +fn exact_top_k( + manifest: &VectorManifest, + query: &[f32], + k: usize, + metric: DistanceMetric, +) -> Vec { + let query_prepared = if metric == DistanceMetric::Cosine { + normalize(query) + } else { + query.to_vec() + }; + let distance = metric.distance_fn(); + let mut candidates: Vec<(u64, f32)> = Vec::with_capacity(manifest.node_to_vector.len()); + + for &vector_id in manifest.node_to_vector.values() { + if let Some(vector) = vector_store_vector_by_id(manifest, vector_id) { + candidates.push((vector_id, distance(&query_prepared, vector))); + } + } + + candidates.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal)); + candidates.into_iter().take(k).map(|(id, _)| id).collect() +} + +fn recall_at_k(approx: &[VectorSearchResult], exact_ids: &[u64], k: usize) -> f64 { + if k == 0 { + return 1.0; + } + let exact: HashSet = exact_ids.iter().copied().collect(); + let hits = approx + .iter() + .take(k) + .filter(|result| exact.contains(&result.vector_id)) + .count(); + hits as f64 / k as f64 +} + +fn choose_n_clusters(config: &BenchConfig) -> usize { + config + .n_clusters + .unwrap_or_else(|| (config.vectors as f64).sqrt() as usize) + .clamp(16, 1024) +} + +fn run_ivf_bench( + config: &BenchConfig, + manifest: &VectorManifest, + vector_ids: &[u64], + training_data: &[f32], + queries: &[Vec], +) -> Result<(f64, u128, u128, f64), String> { + let n_clusters = choose_n_clusters(config); + let ivf_config = IvfConfig::new(n_clusters) + .with_n_probe(config.n_probe) + .with_metric(DistanceMetric::Cosine); + let mut index = IvfIndex::new(config.dimensions, ivf_config); + + let build_start = Instant::now(); + index + .add_training_vectors(training_data, vector_ids.len()) + .map_err(|err| err.to_string())?; + index.train().map_err(|err| err.to_string())?; + for &vector_id in vector_ids { + let vector = vector_store_vector_by_id(manifest, vector_id) + .ok_or_else(|| format!("missing vector {vector_id}"))?; + index + .insert(vector_id, vector) + .map_err(|err| err.to_string())?; + } + let build_elapsed_ms = build_start.elapsed().as_millis() as f64; + + let mut latency_ns: Vec = Vec::with_capacity(queries.len()); + let mut recall_sum = 0.0f64; + for query in queries { + let exact = exact_top_k(manifest, query, config.k, DistanceMetric::Cosine); + let start = Instant::now(); + let approx = index.search( + manifest, + query, + config.k, + Some(SearchOptions { + n_probe: Some(config.n_probe), + filter: None, + threshold: None, + }), + ); + latency_ns.push(start.elapsed().as_nanos()); + recall_sum += recall_at_k(&approx, &exact, config.k); + } + latency_ns.sort_unstable(); + let p50 = percentile(&latency_ns, 0.50); + let p95 = percentile(&latency_ns, 0.95); + let mean_recall = recall_sum / queries.len() as f64; + + Ok((build_elapsed_ms, p50, p95, mean_recall)) +} + +fn run_ivf_pq_bench( + config: &BenchConfig, + manifest: &VectorManifest, + vector_ids: &[u64], + training_data: &[f32], + queries: &[Vec], +) -> Result<(f64, u128, u128, f64), String> { + let n_clusters = choose_n_clusters(config); + let ivf_pq_config = IvfPqConfig::new() + .with_n_clusters(n_clusters) + .with_n_probe(config.n_probe) + .with_metric(DistanceMetric::Cosine) + .with_num_subspaces(config.pq_subspaces) + .with_num_centroids(config.pq_centroids) + .with_residuals(config.residuals); + let mut index = + IvfPqIndex::new(config.dimensions, ivf_pq_config).map_err(|err| err.to_string())?; + + let build_start = Instant::now(); + index + .add_training_vectors(training_data, vector_ids.len()) + .map_err(|err| err.to_string())?; + index.train().map_err(|err| err.to_string())?; + for &vector_id in vector_ids { + let vector = vector_store_vector_by_id(manifest, vector_id) + .ok_or_else(|| format!("missing vector {vector_id}"))?; + index + .insert(vector_id, vector) + .map_err(|err| err.to_string())?; + } + let build_elapsed_ms = build_start.elapsed().as_millis() as f64; + + let mut latency_ns: Vec = Vec::with_capacity(queries.len()); + let mut recall_sum = 0.0f64; + for query in queries { + let exact = exact_top_k(manifest, query, config.k, DistanceMetric::Cosine); + let start = Instant::now(); + let approx = index.search( + manifest, + query, + config.k, + Some(IvfPqSearchOptions { + n_probe: Some(config.n_probe), + filter: None, + threshold: None, + }), + ); + latency_ns.push(start.elapsed().as_nanos()); + recall_sum += recall_at_k(&approx, &exact, config.k); + } + latency_ns.sort_unstable(); + let p50 = percentile(&latency_ns, 0.50); + let p95 = percentile(&latency_ns, 0.95); + let mean_recall = recall_sum / queries.len() as f64; + + Ok((build_elapsed_ms, p50, p95, mean_recall)) +} + +fn main() { + let config = parse_args(); + let n_clusters = choose_n_clusters(&config); + let mut rng = StdRng::seed_from_u64(config.seed); + + let store_config = VectorStoreConfig::new(config.dimensions) + .with_metric(DistanceMetric::Cosine) + .with_normalize(true); + let mut manifest = create_vector_store(store_config); + for node_id in 0..config.vectors { + let vector = random_vector(&mut rng, config.dimensions); + vector_store_insert(&mut manifest, node_id as NodeId, &vector).expect("insert failed"); + } + + let (training_data, _node_ids, vector_ids) = vector_store_all_vectors(&manifest); + let mut query_rng = StdRng::seed_from_u64(config.seed ^ 0xA5A5_5A5A_55AA_AA55); + let queries: Vec> = (0..config.queries) + .map(|_| random_vector(&mut query_rng, config.dimensions)) + .collect(); + + let result = match config.algorithm { + Algorithm::Ivf => run_ivf_bench(&config, &manifest, &vector_ids, &training_data, &queries), + Algorithm::IvfPq => run_ivf_pq_bench(&config, &manifest, &vector_ids, &training_data, &queries), + }; + + match result { + Ok((build_ms, p50_ns, p95_ns, mean_recall)) => { + println!("algorithm: {}", config.algorithm.as_str()); + println!("vectors: {}", config.vectors); + println!("dimensions: {}", config.dimensions); + println!("queries: {}", config.queries); + println!("k: {}", config.k); + println!("n_clusters: {}", n_clusters); + println!("n_probe: {}", config.n_probe); + if config.algorithm == Algorithm::IvfPq { + println!("pq_subspaces: {}", config.pq_subspaces); + println!("pq_centroids: {}", config.pq_centroids); + println!("residuals: {}", config.residuals); + } + println!("build_elapsed_ms: {:.3}", build_ms); + println!("search_p50_ms: {:.6}", p50_ns as f64 / 1_000_000.0); + println!("search_p95_ms: {:.6}", p95_ns as f64 / 1_000_000.0); + println!("mean_recall_at_k: {:.6}", mean_recall); + } + Err(err) => { + eprintln!("benchmark_failed: {err}"); + std::process::exit(1); + } + } +} diff --git a/ray-rs/examples/vector_compaction_bench.rs b/ray-rs/examples/vector_compaction_bench.rs new file mode 100644 index 0000000..631f347 --- /dev/null +++ b/ray-rs/examples/vector_compaction_bench.rs @@ -0,0 +1,282 @@ +//! Vector Compaction Strategy Benchmark (Rust) +//! +//! Evaluates vector fragment compaction behavior for a given workload shape. +//! +//! Usage: +//! cargo run --release --example vector_compaction_bench --no-default-features -- [options] +//! +//! Options: +//! --vectors N Number of vectors to insert (default: 50000) +//! --dimensions D Vector dimensions (default: 384) +//! --fragment-target-size N Vectors per fragment before seal (default: 5000) +//! --delete-ratio R Ratio of vectors to delete [0..1] (default: 0.35) +//! --min-deletion-ratio R Compaction min deletion ratio (default: 0.30) +//! --max-fragments N Max fragments per compaction run (default: 4) +//! --min-vectors-to-compact N Min live vectors required for compaction (default: 10000) +//! --seed N RNG seed (default: 42) + +use kitedb::types::NodeId; +use kitedb::vector::compaction::{ + clear_deleted_fragments, compaction_stats, find_fragments_to_compact, run_compaction_if_needed, + CompactionStrategy, +}; +use kitedb::vector::{ + create_vector_store, vector_store_delete, vector_store_insert, vector_store_seal_active, + vector_store_stats, DistanceMetric, VectorStoreConfig, +}; +use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng}; +use std::env; +use std::time::Instant; + +#[derive(Debug, Clone)] +struct BenchConfig { + vectors: usize, + dimensions: usize, + fragment_target_size: usize, + delete_ratio: f32, + strategy: CompactionStrategy, + seed: u64, +} + +impl Default for BenchConfig { + fn default() -> Self { + Self { + vectors: 50_000, + dimensions: 384, + fragment_target_size: 5_000, + delete_ratio: 0.35, + strategy: CompactionStrategy::default(), + seed: 42, + } + } +} + +fn parse_args() -> BenchConfig { + let mut config = BenchConfig::default(); + let args: Vec = env::args().collect(); + let mut i = 1usize; + + while i < args.len() { + match args[i].as_str() { + "--vectors" => { + if let Some(value) = args.get(i + 1) { + config.vectors = value.parse().unwrap_or(config.vectors); + i += 1; + } + } + "--dimensions" => { + if let Some(value) = args.get(i + 1) { + config.dimensions = value.parse().unwrap_or(config.dimensions); + i += 1; + } + } + "--fragment-target-size" => { + if let Some(value) = args.get(i + 1) { + config.fragment_target_size = value.parse().unwrap_or(config.fragment_target_size); + i += 1; + } + } + "--delete-ratio" => { + if let Some(value) = args.get(i + 1) { + config.delete_ratio = value.parse().unwrap_or(config.delete_ratio); + i += 1; + } + } + "--min-deletion-ratio" => { + if let Some(value) = args.get(i + 1) { + config.strategy.min_deletion_ratio = + value.parse().unwrap_or(config.strategy.min_deletion_ratio); + i += 1; + } + } + "--max-fragments" => { + if let Some(value) = args.get(i + 1) { + config.strategy.max_fragments_per_compaction = value + .parse() + .unwrap_or(config.strategy.max_fragments_per_compaction); + i += 1; + } + } + "--min-vectors-to-compact" => { + if let Some(value) = args.get(i + 1) { + config.strategy.min_vectors_to_compact = value + .parse() + .unwrap_or(config.strategy.min_vectors_to_compact); + i += 1; + } + } + "--seed" => { + if let Some(value) = args.get(i + 1) { + config.seed = value.parse().unwrap_or(config.seed); + i += 1; + } + } + _ => {} + } + i += 1; + } + + config.delete_ratio = config.delete_ratio.clamp(0.0, 1.0); + config.vectors = config.vectors.max(1); + config.dimensions = config.dimensions.max(1); + config.fragment_target_size = config.fragment_target_size.max(1); + config.strategy.max_fragments_per_compaction = + config.strategy.max_fragments_per_compaction.max(1); + + config +} + +fn random_vector(rng: &mut StdRng, dims: usize) -> Vec { + let mut vector = vec![0.0f32; dims]; + for value in &mut vector { + *value = rng.gen_range(-1.0f32..1.0f32); + } + vector +} + +fn format_number(n: usize) -> String { + let mut s = n.to_string(); + let mut i = s.len() as isize - 3; + while i > 0 { + s.insert(i as usize, ','); + i -= 3; + } + s +} + +fn format_ratio(ratio: f32) -> String { + format!("{:.2}%", ratio * 100.0) +} + +fn main() { + let config = parse_args(); + let mut rng = StdRng::seed_from_u64(config.seed); + + println!("{}", "=".repeat(100)); + println!("Vector Compaction Strategy Benchmark (Rust)"); + println!("{}", "=".repeat(100)); + println!("vectors: {}", format_number(config.vectors)); + println!("dimensions: {}", config.dimensions); + println!( + "fragment_target_size: {}", + format_number(config.fragment_target_size) + ); + println!("delete_ratio: {}", format_ratio(config.delete_ratio)); + println!( + "strategy: min_deletion_ratio={}, max_fragments={}, min_vectors_to_compact={}", + config.strategy.min_deletion_ratio, + config.strategy.max_fragments_per_compaction, + format_number(config.strategy.min_vectors_to_compact) + ); + println!("{}", "=".repeat(100)); + + let store_config = VectorStoreConfig::new(config.dimensions) + .with_metric(DistanceMetric::Cosine) + .with_fragment_target_size(config.fragment_target_size); + let mut manifest = create_vector_store(store_config); + + let insert_start = Instant::now(); + for node_id in 0..config.vectors { + let vector = random_vector(&mut rng, config.dimensions); + vector_store_insert(&mut manifest, node_id as NodeId, &vector).expect("vector insert failed"); + } + vector_store_seal_active(&mut manifest); + let insert_elapsed = insert_start.elapsed(); + + let mut ids: Vec = (0..config.vectors).collect(); + ids.shuffle(&mut rng); + let delete_count = ((config.vectors as f32) * config.delete_ratio).round() as usize; + let delete_start = Instant::now(); + let mut deleted = 0usize; + for node_id in ids.iter().take(delete_count) { + if vector_store_delete(&mut manifest, *node_id as NodeId) { + deleted += 1; + } + } + let delete_elapsed = delete_start.elapsed(); + + let before_store = vector_store_stats(&manifest); + let before_compaction = compaction_stats(&manifest); + let candidate_ids = find_fragments_to_compact(&manifest, &config.strategy); + + let clear_start = Instant::now(); + let cleared_fragments = clear_deleted_fragments(&mut manifest); + let clear_elapsed = clear_start.elapsed(); + + let compact_start = Instant::now(); + let compacted = run_compaction_if_needed(&mut manifest, &config.strategy); + let compact_elapsed = compact_start.elapsed(); + + let after_store = vector_store_stats(&manifest); + let after_compaction = compaction_stats(&manifest); + + println!( + "insert_elapsed_ms: {:.2}", + insert_elapsed.as_secs_f64() * 1000.0 + ); + println!( + "insert_throughput_vectors_per_sec: {}", + format_number((config.vectors as f64 / insert_elapsed.as_secs_f64()).round() as usize) + ); + println!( + "delete_elapsed_ms: {:.2}", + delete_elapsed.as_secs_f64() * 1000.0 + ); + println!( + "deleted_vectors: {} (requested {})", + format_number(deleted), + format_number(delete_count) + ); + println!( + "clear_deleted_elapsed_ms: {:.2}", + clear_elapsed.as_secs_f64() * 1000.0 + ); + println!("cleared_fragments: {}", cleared_fragments); + println!( + "compaction_elapsed_ms: {:.2}", + compact_elapsed.as_secs_f64() * 1000.0 + ); + println!("compaction_performed: {}", compacted); + println!( + "candidate_fragments_before: {} ({:?})", + candidate_ids.len(), + candidate_ids + ); + + println!("\nStore stats (before -> after):"); + println!( + " live_vectors: {} -> {}", + format_number(before_store.live_vectors), + format_number(after_store.live_vectors) + ); + println!( + " total_deleted: {} -> {}", + format_number(before_store.total_deleted), + format_number(after_store.total_deleted) + ); + println!( + " fragment_count: {} -> {}", + before_store.fragment_count, after_store.fragment_count + ); + println!( + " bytes_used: {} -> {}", + format_number(before_store.bytes_used), + format_number(after_store.bytes_used) + ); + + println!("\nCompaction stats (before -> after):"); + println!( + " fragments_needing_compaction: {} -> {}", + before_compaction.fragments_needing_compaction, after_compaction.fragments_needing_compaction + ); + println!( + " total_deleted_vectors: {} -> {}", + format_number(before_compaction.total_deleted_vectors), + format_number(after_compaction.total_deleted_vectors) + ); + println!( + " average_deletion_ratio: {} -> {}", + format_ratio(before_compaction.average_deletion_ratio), + format_ratio(after_compaction.average_deletion_ratio) + ); +} diff --git a/ray-rs/index.d.ts b/ray-rs/index.d.ts index 45f1dec..560ea06 100644 --- a/ray-rs/index.d.ts +++ b/ray-rs/index.d.ts @@ -6,6 +6,8 @@ export declare class Database { static open(path: string, options?: OpenOptions | undefined | null): Database /** Close the database */ close(): void + /** Close the database and run a blocking checkpoint if WAL usage is above threshold. */ + closeWithCheckpointIfWalOver(threshold: number): void /** Check if database is open */ get isOpen(): boolean /** Get database path */ @@ -864,6 +866,64 @@ export interface CheckResult { export declare function collectMetrics(db: Database): DatabaseMetrics +export declare function collectReplicationLogTransportJson(db: Database, cursor?: string | undefined | null, maxFrames?: number | undefined | null, maxBytes?: number | undefined | null, includePayload?: boolean | undefined | null): string + +export declare function collectReplicationMetricsOtelJson(db: Database): string + +export declare function collectReplicationMetricsOtelProtobuf(db: Database): Buffer + +export declare function collectReplicationMetricsPrometheus(db: Database): string + +export declare function collectReplicationSnapshotTransportJson(db: Database, includeData?: boolean | undefined | null): string + +export interface OtlpHttpExportResult { + statusCode: number + responseBody: string +} + +export declare function pushReplicationMetricsOtelJson(db: Database, endpoint: string, timeoutMs: number, bearerToken?: string | undefined | null): OtlpHttpExportResult + +export interface PushReplicationMetricsOtelOptions { + timeoutMs?: number + bearerToken?: string + retryMaxAttempts?: number + retryBackoffMs?: number + retryBackoffMaxMs?: number + retryJitterRatio?: number + adaptiveRetry?: boolean + adaptiveRetryMode?: 'linear' | 'ewma' + adaptiveRetryEwmaAlpha?: number + circuitBreakerFailureThreshold?: number + circuitBreakerOpenMs?: number + circuitBreakerHalfOpenProbes?: number + circuitBreakerStatePath?: string + circuitBreakerStateUrl?: string + circuitBreakerStatePatch?: boolean + circuitBreakerStatePatchBatch?: boolean + circuitBreakerStatePatchBatchMaxKeys?: number + circuitBreakerStatePatchMerge?: boolean + circuitBreakerStatePatchMergeMaxKeys?: number + circuitBreakerStatePatchRetryMaxAttempts?: number + circuitBreakerStateCas?: boolean + circuitBreakerStateLeaseId?: string + circuitBreakerScopeKey?: string + compressionGzip?: boolean + httpsOnly?: boolean + caCertPemPath?: string + clientCertPemPath?: string + clientKeyPemPath?: string +} + +export declare function pushReplicationMetricsOtelJsonWithOptions(db: Database, endpoint: string, options?: PushReplicationMetricsOtelOptions | undefined | null): OtlpHttpExportResult + +export declare function pushReplicationMetricsOtelProtobuf(db: Database, endpoint: string, timeoutMs: number, bearerToken?: string | undefined | null): OtlpHttpExportResult + +export declare function pushReplicationMetricsOtelProtobufWithOptions(db: Database, endpoint: string, options?: PushReplicationMetricsOtelOptions | undefined | null): OtlpHttpExportResult + +export declare function pushReplicationMetricsOtelGrpc(db: Database, endpoint: string, timeoutMs: number, bearerToken?: string | undefined | null): OtlpHttpExportResult + +export declare function pushReplicationMetricsOtelGrpcWithOptions(db: Database, endpoint: string, options?: PushReplicationMetricsOtelOptions | undefined | null): OtlpHttpExportResult + /** Compression options */ export interface CompressionOptions { /** Enable compression (default false) */ @@ -1144,6 +1204,8 @@ export interface JsKiteOptions { walSizeMb?: number /** WAL usage threshold (0.0-1.0) to trigger auto-checkpoint */ checkpointThreshold?: number + /** On close, checkpoint if WAL usage is at or above this threshold (default: 0.2) */ + closeCheckpointIfWalUsageAtLeast?: number } /** Node property key-value pair for JS */ @@ -1394,6 +1456,23 @@ export interface OfflineBackupOptions { /** Open a database file (standalone function) */ export declare function openDatabase(path: string, options?: OpenOptions | undefined | null): Database +/** Recommended conservative profile (durability-first). */ +export declare function recommendedSafeProfile(): RuntimeProfile + +/** Recommended balanced profile (good throughput + durability tradeoff). */ +export declare function recommendedBalancedProfile(): RuntimeProfile + +/** Recommended profile for reopen-heavy workloads. */ +export declare function recommendedReopenHeavyProfile(): RuntimeProfile + +/** Runtime profile preset for open/close behavior. */ +export interface RuntimeProfile { + /** Open-time options for `Database.open(path, options)`. */ + openOptions: OpenOptions + /** Optional close-time checkpoint trigger threshold. */ + closeCheckpointIfWalUsageAtLeast?: number +} + /** Options for opening a database */ export interface OpenOptions { /** Open in read-only mode */ diff --git a/ray-rs/index.js b/ray-rs/index.js index 18c95c3..9d8766d 100644 --- a/ray-rs/index.js +++ b/ray-rs/index.js @@ -597,6 +597,17 @@ module.exports.VectorIndex = nativeBinding.VectorIndex module.exports.backupInfo = nativeBinding.backupInfo module.exports.bruteForceSearch = nativeBinding.bruteForceSearch module.exports.collectMetrics = nativeBinding.collectMetrics +module.exports.collectReplicationLogTransportJson = nativeBinding.collectReplicationLogTransportJson +module.exports.collectReplicationMetricsOtelJson = nativeBinding.collectReplicationMetricsOtelJson +module.exports.collectReplicationMetricsOtelProtobuf = nativeBinding.collectReplicationMetricsOtelProtobuf +module.exports.collectReplicationMetricsPrometheus = nativeBinding.collectReplicationMetricsPrometheus +module.exports.collectReplicationSnapshotTransportJson = nativeBinding.collectReplicationSnapshotTransportJson +module.exports.pushReplicationMetricsOtelJson = nativeBinding.pushReplicationMetricsOtelJson +module.exports.pushReplicationMetricsOtelJsonWithOptions = nativeBinding.pushReplicationMetricsOtelJsonWithOptions +module.exports.pushReplicationMetricsOtelProtobuf = nativeBinding.pushReplicationMetricsOtelProtobuf +module.exports.pushReplicationMetricsOtelProtobufWithOptions = nativeBinding.pushReplicationMetricsOtelProtobufWithOptions +module.exports.pushReplicationMetricsOtelGrpc = nativeBinding.pushReplicationMetricsOtelGrpc +module.exports.pushReplicationMetricsOtelGrpcWithOptions = nativeBinding.pushReplicationMetricsOtelGrpcWithOptions module.exports.createBackup = nativeBinding.createBackup module.exports.createOfflineBackup = nativeBinding.createOfflineBackup module.exports.createVectorIndex = nativeBinding.createVectorIndex diff --git a/ray-rs/python/PARITY_MATRIX.md b/ray-rs/python/PARITY_MATRIX.md index c8af6ad..443eb7c 100644 --- a/ray-rs/python/PARITY_MATRIX.md +++ b/ray-rs/python/PARITY_MATRIX.md @@ -62,7 +62,7 @@ Legend: parity = full feature match, partial = similar capability with API or be | Export/Import | `export*`, `import*` | `export*`, `import*` | parity | Python exposes JSON object and file helpers. | | Streaming | `stream*`, `get*Page` | `stream*`, `get*Page` | parity | Same batching/pagination behavior. | | Backup/Restore | `createBackup`, `restoreBackup` | `create_backup`, `restore_backup` | parity | Naming differences only. | -| Metrics/Health | `collectMetrics`, `healthCheck` | `collect_metrics`, `health_check` | parity | Naming differences only. | +| Metrics/Health | `collectMetrics`, `collectReplicationMetricsPrometheus`, `collectReplicationMetricsOtelJson`, `pushReplicationMetricsOtelJson`, `collectReplicationSnapshotTransportJson`, `collectReplicationLogTransportJson`, `healthCheck` | `collect_metrics`, `collect_replication_metrics_prometheus`, `collect_replication_metrics_otel_json`, `push_replication_metrics_otel_json`, `collect_replication_snapshot_transport_json`, `collect_replication_log_transport_json`, `health_check` | parity | Naming differences only. | ## Vector Search diff --git a/ray-rs/python/README.md b/ray-rs/python/README.md index 9fa7f8f..c00c206 100644 --- a/ray-rs/python/README.md +++ b/ray-rs/python/README.md @@ -188,6 +188,97 @@ for result in results: print(result.node_id, result.distance) ``` +## Replication admin (low-level API) + +Phase D replication controls are available on `Database`: + +```python +from kitedb import ( + Database, + OpenOptions, + collect_replication_log_transport_json, + collect_replication_metrics_otel_json, + collect_replication_metrics_prometheus, + collect_replication_snapshot_transport_json, + push_replication_metrics_otel_json, +) + +primary = Database( + "cluster-primary.kitedb", + OpenOptions( + replication_role="primary", + replication_sidecar_path="./cluster-primary.sidecar", + replication_segment_max_bytes=64 * 1024 * 1024, + replication_retention_min_entries=1024, + ), +) + +primary.begin() +primary.create_node("n:1") +token = primary.commit_with_token() + +primary.primary_report_replica_progress("replica-a", 1, 42) +pruned_segments, retained_floor = primary.primary_run_retention() +primary_status = primary.primary_replication_status() + +replica = Database( + "cluster-replica.kitedb", + OpenOptions( + replication_role="replica", + replication_sidecar_path="./cluster-replica.sidecar", + replication_source_db_path="cluster-primary.kitedb", + replication_source_sidecar_path="./cluster-primary.sidecar", + ), +) + +replica.replica_bootstrap_from_snapshot() +replica.replica_catch_up_once(256) +if token: + replica.wait_for_token(token, 2000) +replica_status = replica.replica_replication_status() +if replica_status and replica_status["needs_reseed"]: + replica.replica_reseed_from_snapshot() + +prometheus = collect_replication_metrics_prometheus(primary) +print(prometheus) + +otel_json = collect_replication_metrics_otel_json(primary) +print(otel_json) + +status_code, response_body = push_replication_metrics_otel_json( + primary, + "http://127.0.0.1:4318/v1/metrics", + timeout_ms=5000, +) +print(status_code, response_body) + +secure_status, secure_body = push_replication_metrics_otel_json( + primary, + "https://collector.internal:4318/v1/metrics", + timeout_ms=5000, + https_only=True, + ca_cert_pem_path="./tls/collector-ca.pem", + client_cert_pem_path="./tls/client.pem", + client_key_pem_path="./tls/client-key.pem", +) +print(secure_status, secure_body) + +snapshot_json = collect_replication_snapshot_transport_json(primary, include_data=False) +print(snapshot_json) + +log_json = collect_replication_log_transport_json( + primary, + cursor=None, + max_frames=128, + max_bytes=1024 * 1024, + include_payload=False, +) +print(log_json) + +replica.close() +primary.close() +``` + ## Documentation ```text diff --git a/ray-rs/python/benchmarks/benchmark_raydb_vs_memgraph.py b/ray-rs/python/benchmarks/benchmark_raydb_vs_memgraph.py new file mode 100644 index 0000000..c799cd4 --- /dev/null +++ b/ray-rs/python/benchmarks/benchmark_raydb_vs_memgraph.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python3 +""" +RayDB vs Memgraph 1-hop traversal benchmark. + +Workload: + - Build identical graph shape in both databases + - 10k nodes, 20k edges (defaults) + - Query equivalent to `db.from(alice).out(Knows).toArray()` + - Alice fan-out defaults to 10 (inside the requested 5-20 range) + +Prerequisites: + - RayDB python bindings installed (`maturin develop --features python`) + - Memgraph running and reachable via Bolt + - Neo4j python driver installed (`pip install neo4j`) +""" + +from __future__ import annotations + +import argparse +import random +import shutil +import sys +import tempfile +import time +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional, Sequence, Tuple + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +try: + from kitedb import Database, define_edge, define_node, kite +except ImportError: + print("Error: kitedb module not found. Build the Python bindings first:") + print(" maturin develop --features python") + sys.exit(1) + +try: + from neo4j import GraphDatabase +except ImportError: + print("Error: neo4j driver not found. Install it with:") + print(" pip install neo4j") + sys.exit(1) + + +@dataclass +class BenchConfig: + nodes: int + edges: int + query_results: int + iterations: int + warmup: int + seed: int + batch_size: int + memgraph_uri: str + memgraph_user: str + memgraph_password: str + memgraph_database: Optional[str] + keep_raydb: bool + + +@dataclass +class LatencyStats: + count: int + min_ns: int + max_ns: int + sum_ns: int + p50_ns: int + p95_ns: int + p99_ns: int + + @property + def ops_per_sec(self) -> float: + if self.sum_ns <= 0: + return 0.0 + return self.count / (self.sum_ns / 1_000_000_000.0) + + +class LatencyTracker: + def __init__(self): + self.samples_ns: List[int] = [] + + def record(self, latency_ns: int): + self.samples_ns.append(latency_ns) + + def stats(self) -> LatencyStats: + if not self.samples_ns: + return LatencyStats(0, 0, 0, 0, 0, 0, 0) + + sorted_samples = sorted(self.samples_ns) + count = len(sorted_samples) + return LatencyStats( + count=count, + min_ns=sorted_samples[0], + max_ns=sorted_samples[-1], + sum_ns=sum(sorted_samples), + p50_ns=sorted_samples[int(count * 0.50)], + p95_ns=sorted_samples[int(count * 0.95)], + p99_ns=sorted_samples[int(count * 0.99)], + ) + + +def parse_args() -> BenchConfig: + parser = argparse.ArgumentParser(description="RayDB vs Memgraph traversal benchmark") + parser.add_argument("--nodes", type=int, default=10_000) + parser.add_argument("--edges", type=int, default=20_000) + parser.add_argument( + "--query-results", + type=int, + default=10, + help="Exact outgoing neighbors from alice in generated graph", + ) + parser.add_argument("--iterations", type=int, default=5_000) + parser.add_argument("--warmup", type=int, default=500) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--batch-size", type=int, default=1_000) + parser.add_argument("--memgraph-uri", type=str, default="bolt://127.0.0.1:7687") + parser.add_argument("--memgraph-user", type=str, default="") + parser.add_argument("--memgraph-password", type=str, default="") + parser.add_argument("--memgraph-database", type=str, default="") + parser.add_argument("--keep-raydb", action="store_true") + + args = parser.parse_args() + + if args.nodes < 2: + raise ValueError("--nodes must be >= 2") + if args.edges < 1: + raise ValueError("--edges must be >= 1") + if args.query_results < 1: + raise ValueError("--query-results must be >= 1") + if args.query_results >= args.nodes: + raise ValueError("--query-results must be < --nodes") + if args.query_results > args.edges: + raise ValueError("--query-results must be <= --edges") + if args.iterations < 1: + raise ValueError("--iterations must be >= 1") + if args.warmup < 0: + raise ValueError("--warmup must be >= 0") + if args.batch_size < 1: + raise ValueError("--batch-size must be >= 1") + + return BenchConfig( + nodes=args.nodes, + edges=args.edges, + query_results=args.query_results, + iterations=args.iterations, + warmup=args.warmup, + seed=args.seed, + batch_size=args.batch_size, + memgraph_uri=args.memgraph_uri, + memgraph_user=args.memgraph_user, + memgraph_password=args.memgraph_password, + memgraph_database=args.memgraph_database or None, + keep_raydb=args.keep_raydb, + ) + + +def format_latency(ns: int) -> str: + if ns < 1_000: + return f"{ns}ns" + if ns < 1_000_000: + return f"{ns / 1_000.0:.2f}us" + return f"{ns / 1_000_000.0:.2f}ms" + + +def format_number(value: int) -> str: + return f"{value:,}" + + +def build_workload( + nodes: int, + edges: int, + query_results: int, + seed: int, +) -> Tuple[List[str], List[Tuple[int, int]]]: + keys = ["user:alice"] + [f"user:u{i}" for i in range(1, nodes)] + + edge_set: set[Tuple[int, int]] = set() + + # Guarantee exact fan-out from alice (node 0) for query sanity. + for dst in range(1, query_results + 1): + edge_set.add((0, dst)) + + rng = random.Random(seed) + while len(edge_set) < edges: + src = rng.randrange(1, nodes) # keep alice fan-out stable + dst = rng.randrange(0, nodes) + if src == dst: + continue + edge_set.add((src, dst)) + + return keys, list(edge_set) + + +def ingest_raydb( + raydb_path: str, + keys: Sequence[str], + edges: Sequence[Tuple[int, int]], + batch_size: int, +) -> float: + started = time.perf_counter_ns() + db = Database(raydb_path) + try: + etype = db.get_or_create_etype("knows") + node_ids: List[int] = [] + + for offset in range(0, len(keys), batch_size): + db.begin_bulk() + batch_keys = keys[offset : offset + batch_size] + batch_ids = db.create_nodes_batch(list(batch_keys)) + node_ids.extend(batch_ids) + db.commit() + + for offset in range(0, len(edges), batch_size): + db.begin_bulk() + batch_edges = [] + for src_index, dst_index in edges[offset : offset + batch_size]: + batch_edges.append((node_ids[src_index], etype, node_ids[dst_index])) + db.add_edges_batch(batch_edges) + db.commit() + finally: + db.close() + + return (time.perf_counter_ns() - started) / 1_000_000.0 + + +def benchmark_raydb_query( + raydb_path: str, + iterations: int, + warmup: int, +) -> Tuple[LatencyStats, int]: + user = define_node( + "user", + key=lambda key: f"user:{key}", + props={}, + ) + knows = define_edge("knows", {}) + + tracker = LatencyTracker() + result_len = 0 + + with kite(raydb_path, nodes=[user], edges=[knows]) as db: + alice = db.get_ref(user, "alice") + + for _ in range(warmup): + db.from_(alice).out(knows).to_list() + + for _ in range(iterations): + start = time.perf_counter_ns() + result = db.from_(alice).out(knows).to_list() + tracker.record(time.perf_counter_ns() - start) + result_len = len(result) + + return tracker.stats(), result_len + + +def new_memgraph_driver(config: BenchConfig): + auth = None + if config.memgraph_user or config.memgraph_password: + auth = (config.memgraph_user, config.memgraph_password) + return GraphDatabase.driver(config.memgraph_uri, auth=auth) + + +def session_for(driver, database: Optional[str]): + if database: + return driver.session(database=database) + return driver.session() + + +def ingest_memgraph( + driver, + keys: Sequence[str], + edges: Sequence[Tuple[int, int]], + batch_size: int, + database: Optional[str], +) -> float: + started = time.perf_counter_ns() + + with session_for(driver, database) as session: + session.run("MATCH (n) DETACH DELETE n").consume() + + try: + session.run("CREATE INDEX ON :User(key)").consume() + except Exception: + # Index may already exist (from previous runs). + pass + + for offset in range(0, len(keys), batch_size): + rows = [{"key": key} for key in keys[offset : offset + batch_size]] + session.run( + "UNWIND $rows AS row CREATE (:User {key: row.key})", + rows=rows, + ).consume() + + for offset in range(0, len(edges), batch_size): + rows = [] + for src_index, dst_index in edges[offset : offset + batch_size]: + rows.append({"src": keys[src_index], "dst": keys[dst_index]}) + session.run( + """ + UNWIND $rows AS row + MATCH (s:User {key: row.src}) + MATCH (d:User {key: row.dst}) + CREATE (s)-[:KNOWS]->(d) + """, + rows=rows, + ).consume() + + return (time.perf_counter_ns() - started) / 1_000_000.0 + + +def benchmark_memgraph_query( + driver, + iterations: int, + warmup: int, + database: Optional[str], +) -> Tuple[LatencyStats, int]: + tracker = LatencyTracker() + result_len = 0 + query = "MATCH (a:User {key: $key})-[:KNOWS]->(b) RETURN b.key AS key" + + with session_for(driver, database) as session: + for _ in range(warmup): + list(session.run(query, key="user:alice")) + + for _ in range(iterations): + start = time.perf_counter_ns() + rows = list(session.run(query, key="user:alice")) + tracker.record(time.perf_counter_ns() - start) + result_len = len(rows) + + return tracker.stats(), result_len + + +def print_stats(label: str, stats: LatencyStats): + print( + f"{label:<10} p50={format_latency(stats.p50_ns):>10} " + f"p95={format_latency(stats.p95_ns):>10} " + f"p99={format_latency(stats.p99_ns):>10} " + f"max={format_latency(stats.max_ns):>10} " + f"({format_number(int(stats.ops_per_sec))} ops/sec)" + ) + + +def main(): + config = parse_args() + keys, edges = build_workload( + nodes=config.nodes, + edges=config.edges, + query_results=config.query_results, + seed=config.seed, + ) + + raydb_dir = tempfile.mkdtemp(prefix="raydb-vs-memgraph-") + raydb_path = str(Path(raydb_dir) / "benchmark.kitedb") + + print("RayDB vs Memgraph: 1-hop traversal benchmark") + print(f"Nodes: {format_number(config.nodes)}") + print(f"Edges: {format_number(config.edges)}") + print(f"Alice expected results: {config.query_results}") + print(f"Iterations: {format_number(config.iterations)} (warmup {format_number(config.warmup)})") + print("") + + try: + raydb_ingest_ms = ingest_raydb(raydb_path, keys, edges, config.batch_size) + driver = new_memgraph_driver(config) + try: + memgraph_ingest_ms = ingest_memgraph( + driver, + keys, + edges, + config.batch_size, + config.memgraph_database, + ) + raydb_stats, raydb_results = benchmark_raydb_query( + raydb_path, + config.iterations, + config.warmup, + ) + memgraph_stats, memgraph_results = benchmark_memgraph_query( + driver, + config.iterations, + config.warmup, + config.memgraph_database, + ) + finally: + driver.close() + + if raydb_results != config.query_results: + raise RuntimeError( + f"RayDB returned {raydb_results} rows, expected {config.query_results}" + ) + if memgraph_results != config.query_results: + raise RuntimeError( + f"Memgraph returned {memgraph_results} rows, expected {config.query_results}" + ) + + print("Setup times (not included in query latency):") + print(f" RayDB ingest: {raydb_ingest_ms:.2f}ms") + print(f" Memgraph ingest: {memgraph_ingest_ms:.2f}ms") + print("") + print("Query latency (equivalent to from(alice).out(Knows).toArray):") + print_stats("RayDB", raydb_stats) + print_stats("Memgraph", memgraph_stats) + + if raydb_stats.p50_ns > 0: + p50_ratio = memgraph_stats.p50_ns / raydb_stats.p50_ns + p95_ratio = memgraph_stats.p95_ns / raydb_stats.p95_ns if raydb_stats.p95_ns > 0 else 0.0 + print("") + print(f"Memgraph/RayDB ratio: p50={p50_ratio:.2f}x p95={p95_ratio:.2f}x") + finally: + if config.keep_raydb: + print(f"\nRayDB dataset kept at: {raydb_path}") + else: + shutil.rmtree(raydb_dir, ignore_errors=True) + + +if __name__ == "__main__": + main() diff --git a/ray-rs/python/kitedb/__init__.py b/ray-rs/python/kitedb/__init__.py index a56bd64..f86308c 100644 --- a/ray-rs/python/kitedb/__init__.py +++ b/ray-rs/python/kitedb/__init__.py @@ -53,6 +53,7 @@ # Core classes Database, OpenOptions, + RuntimeProfile, SyncMode, SnapshotParseMode, DbStats, @@ -102,7 +103,18 @@ # Functions open_database, + recommended_safe_profile, + recommended_balanced_profile, + recommended_reopen_heavy_profile, collect_metrics, + collect_replication_log_transport_json, + collect_replication_metrics_otel_json, + collect_replication_metrics_otel_protobuf, + collect_replication_metrics_prometheus, + collect_replication_snapshot_transport_json, + push_replication_metrics_otel_grpc, + push_replication_metrics_otel_json, + push_replication_metrics_otel_protobuf, health_check, create_backup, restore_backup, @@ -162,6 +174,17 @@ create_vector_index, ) +from kitedb.replication_auth import ( + AsgiMtlsMatcherOptions, + ReplicationAdminAuthConfig, + ReplicationAdminAuthMode, + authorize_replication_admin_request, + create_asgi_tls_mtls_matcher, + create_replication_admin_authorizer, + is_asgi_tls_client_authorized, + is_replication_admin_authorized, +) + __version__ = version() __all__ = [ @@ -220,6 +243,7 @@ # Core "Database", "OpenOptions", + "RuntimeProfile", "SyncMode", "SnapshotParseMode", "DbStats", @@ -269,7 +293,18 @@ # Functions "open_database", + "recommended_safe_profile", + "recommended_balanced_profile", + "recommended_reopen_heavy_profile", "collect_metrics", + "collect_replication_log_transport_json", + "collect_replication_metrics_otel_json", + "collect_replication_metrics_otel_protobuf", + "collect_replication_metrics_prometheus", + "collect_replication_snapshot_transport_json", + "push_replication_metrics_otel_grpc", + "push_replication_metrics_otel_json", + "push_replication_metrics_otel_protobuf", "health_check", "create_backup", "restore_backup", @@ -277,6 +312,16 @@ "create_offline_backup", "version", "brute_force_search", + + # Replication transport auth helpers + "ReplicationAdminAuthMode", + "ReplicationAdminAuthConfig", + "AsgiMtlsMatcherOptions", + "is_replication_admin_authorized", + "authorize_replication_admin_request", + "create_replication_admin_authorizer", + "is_asgi_tls_client_authorized", + "create_asgi_tls_mtls_matcher", # Version "__version__", diff --git a/ray-rs/python/kitedb/_kitedb.pyi b/ray-rs/python/kitedb/_kitedb.pyi index 6f3fa43..0fea172 100644 --- a/ray-rs/python/kitedb/_kitedb.pyi +++ b/ray-rs/python/kitedb/_kitedb.pyi @@ -68,6 +68,11 @@ class SyncMode: @staticmethod def off() -> SyncMode: ... +class RuntimeProfile: + """Preset profile for open/close behavior.""" + open_options: OpenOptions + close_checkpoint_if_wal_usage_at_least: Optional[float] + class DbStats: """Database statistics.""" snapshot_gen: int @@ -355,7 +360,10 @@ class Database: read_only: bool def __init__(self, path: str, options: Optional[OpenOptions] = None) -> None: ... + @staticmethod + def open(path: str, options: Optional[OpenOptions] = None) -> Database: ... def close(self) -> None: ... + def close_with_checkpoint_if_wal_over(self, threshold: float) -> None: ... def __enter__(self) -> Database: ... def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool: ... @@ -533,7 +541,120 @@ class Database: ) -> List[int]: ... def open_database(path: str, options: Optional[OpenOptions] = None) -> Database: ... +def recommended_safe_profile() -> RuntimeProfile: ... +def recommended_balanced_profile() -> RuntimeProfile: ... +def recommended_reopen_heavy_profile() -> RuntimeProfile: ... def collect_metrics(db: Database) -> DatabaseMetrics: ... +def collect_replication_snapshot_transport_json( + db: Database, + include_data: bool = False, +) -> str: ... +def collect_replication_log_transport_json( + db: Database, + cursor: Optional[str] = None, + max_frames: int = 128, + max_bytes: int = 1048576, + include_payload: bool = True, +) -> str: ... +def collect_replication_metrics_otel_json(db: Database) -> str: ... +def collect_replication_metrics_otel_protobuf(db: Database) -> bytes: ... +def collect_replication_metrics_prometheus(db: Database) -> str: ... +def push_replication_metrics_otel_json( + db: Database, + endpoint: str, + timeout_ms: int = 5000, + bearer_token: Optional[str] = None, + retry_max_attempts: int = 1, + retry_backoff_ms: int = 100, + retry_backoff_max_ms: int = 2000, + retry_jitter_ratio: float = 0.0, + adaptive_retry: bool = False, + adaptive_retry_mode: Optional[str] = None, + adaptive_retry_ewma_alpha: float = 0.3, + circuit_breaker_failure_threshold: int = 0, + circuit_breaker_open_ms: int = 0, + circuit_breaker_half_open_probes: int = 1, + circuit_breaker_state_path: Optional[str] = None, + circuit_breaker_state_url: Optional[str] = None, + circuit_breaker_state_patch: bool = False, + circuit_breaker_state_patch_batch: bool = False, + circuit_breaker_state_patch_batch_max_keys: int = 8, + circuit_breaker_state_patch_merge: bool = False, + circuit_breaker_state_patch_merge_max_keys: int = 32, + circuit_breaker_state_patch_retry_max_attempts: int = 1, + circuit_breaker_state_cas: bool = False, + circuit_breaker_state_lease_id: Optional[str] = None, + circuit_breaker_scope_key: Optional[str] = None, + compression_gzip: bool = False, + https_only: bool = False, + ca_cert_pem_path: Optional[str] = None, + client_cert_pem_path: Optional[str] = None, + client_key_pem_path: Optional[str] = None, +) -> Tuple[int, str]: ... +def push_replication_metrics_otel_grpc( + db: Database, + endpoint: str, + timeout_ms: int = 5000, + bearer_token: Optional[str] = None, + retry_max_attempts: int = 1, + retry_backoff_ms: int = 100, + retry_backoff_max_ms: int = 2000, + retry_jitter_ratio: float = 0.0, + adaptive_retry: bool = False, + adaptive_retry_mode: Optional[str] = None, + adaptive_retry_ewma_alpha: float = 0.3, + circuit_breaker_failure_threshold: int = 0, + circuit_breaker_open_ms: int = 0, + circuit_breaker_half_open_probes: int = 1, + circuit_breaker_state_path: Optional[str] = None, + circuit_breaker_state_url: Optional[str] = None, + circuit_breaker_state_patch: bool = False, + circuit_breaker_state_patch_batch: bool = False, + circuit_breaker_state_patch_batch_max_keys: int = 8, + circuit_breaker_state_patch_merge: bool = False, + circuit_breaker_state_patch_merge_max_keys: int = 32, + circuit_breaker_state_patch_retry_max_attempts: int = 1, + circuit_breaker_state_cas: bool = False, + circuit_breaker_state_lease_id: Optional[str] = None, + circuit_breaker_scope_key: Optional[str] = None, + compression_gzip: bool = False, + https_only: bool = False, + ca_cert_pem_path: Optional[str] = None, + client_cert_pem_path: Optional[str] = None, + client_key_pem_path: Optional[str] = None, +) -> Tuple[int, str]: ... +def push_replication_metrics_otel_protobuf( + db: Database, + endpoint: str, + timeout_ms: int = 5000, + bearer_token: Optional[str] = None, + retry_max_attempts: int = 1, + retry_backoff_ms: int = 100, + retry_backoff_max_ms: int = 2000, + retry_jitter_ratio: float = 0.0, + adaptive_retry: bool = False, + adaptive_retry_mode: Optional[str] = None, + adaptive_retry_ewma_alpha: float = 0.3, + circuit_breaker_failure_threshold: int = 0, + circuit_breaker_open_ms: int = 0, + circuit_breaker_half_open_probes: int = 1, + circuit_breaker_state_path: Optional[str] = None, + circuit_breaker_state_url: Optional[str] = None, + circuit_breaker_state_patch: bool = False, + circuit_breaker_state_patch_batch: bool = False, + circuit_breaker_state_patch_batch_max_keys: int = 8, + circuit_breaker_state_patch_merge: bool = False, + circuit_breaker_state_patch_merge_max_keys: int = 32, + circuit_breaker_state_patch_retry_max_attempts: int = 1, + circuit_breaker_state_cas: bool = False, + circuit_breaker_state_lease_id: Optional[str] = None, + circuit_breaker_scope_key: Optional[str] = None, + compression_gzip: bool = False, + https_only: bool = False, + ca_cert_pem_path: Optional[str] = None, + client_cert_pem_path: Optional[str] = None, + client_key_pem_path: Optional[str] = None, +) -> Tuple[int, str]: ... def health_check(db: Database) -> HealthCheckResult: ... def create_backup(db: Database, backup_path: str, options: Optional[BackupOptions] = None) -> BackupResult: ... def restore_backup(backup_path: str, restore_path: str, options: Optional[RestoreOptions] = None) -> str: ... diff --git a/ray-rs/python/kitedb/fluent.py b/ray-rs/python/kitedb/fluent.py index 93fe2bf..953102e 100644 --- a/ray-rs/python/kitedb/fluent.py +++ b/ray-rs/python/kitedb/fluent.py @@ -135,6 +135,7 @@ def __init__( nodes: List[NodeDef[Any]], edges: List[EdgeDef], options: Optional[OpenOptions] = None, + close_checkpoint_if_wal_usage_at_least: Optional[float] = 0.2, ): """ Open or create a Kite database. @@ -144,8 +145,15 @@ def __init__( nodes: List of node definitions edges: List of edge definitions options: Optional database options + close_checkpoint_if_wal_usage_at_least: + On close, checkpoint if WAL usage >= threshold. Set None to disable. """ self._db = Database(path, options) + self._close_checkpoint_if_wal_usage_at_least = ( + None + if close_checkpoint_if_wal_usage_at_least is None + else max(0.0, min(1.0, float(close_checkpoint_if_wal_usage_at_least))) + ) self._nodes: Dict[str, NodeDef[Any]] = {n.name: n for n in nodes} self._edges: Dict[str, EdgeDef] = {e.name: e for e in edges} self._etype_ids: Dict[EdgeDef, int] = {} @@ -793,7 +801,12 @@ def optimize(self) -> None: def close(self) -> None: """Close the database.""" - self._db.close() + if self._close_checkpoint_if_wal_usage_at_least is None: + self._db.close() + return + self._db.close_with_checkpoint_if_wal_over( + self._close_checkpoint_if_wal_usage_at_least + ) @property def raw(self) -> Database: @@ -914,6 +927,7 @@ def kite( nodes: List[NodeDef[Any]], edges: List[EdgeDef], options: Optional[OpenOptions] = None, + close_checkpoint_if_wal_usage_at_least: Optional[float] = 0.2, ) -> Kite: """ Open or create a KiteDB database. @@ -925,6 +939,8 @@ def kite( nodes: List of node definitions edges: List of edge definitions options: Optional database options + close_checkpoint_if_wal_usage_at_least: + On close, checkpoint if WAL usage >= threshold. Set None to disable. Returns: Kite database instance @@ -951,7 +967,13 @@ def kite( >>> with kite("./my-graph", nodes=[user], edges=[knows]) as db: ... alice = db.insert(user).values(key="alice", name="Alice").returning() """ - return Kite(path, nodes=nodes, edges=edges, options=options) + return Kite( + path, + nodes=nodes, + edges=edges, + options=options, + close_checkpoint_if_wal_usage_at_least=close_checkpoint_if_wal_usage_at_least, + ) __all__ = [ diff --git a/ray-rs/python/kitedb/replication_auth.py b/ray-rs/python/kitedb/replication_auth.py new file mode 100644 index 0000000..6d135e9 --- /dev/null +++ b/ray-rs/python/kitedb/replication_auth.py @@ -0,0 +1,196 @@ +"""Replication admin auth helpers for host-runtime adapters.""" + +from __future__ import annotations + +from dataclasses import dataclass +import re +from typing import Any, Callable, Mapping, Optional, Pattern, Union, Literal + +ReplicationAdminAuthMode = Literal[ + "none", + "token", + "mtls", + "token_or_mtls", + "token_and_mtls", +] + + +@dataclass(frozen=True) +class ReplicationAdminAuthConfig: + mode: ReplicationAdminAuthMode = "none" + token: Optional[str] = None + mtls_header: str = "x-forwarded-client-cert" + mtls_subject_regex: Optional[Union[str, Pattern[str]]] = None + mtls_matcher: Optional[Callable[[Any], bool]] = None + + +@dataclass(frozen=True) +class AsgiMtlsMatcherOptions: + require_peer_certificate: bool = False + + +_VALID_REPLICATION_ADMIN_AUTH_MODES = { + "none", + "token", + "mtls", + "token_or_mtls", + "token_and_mtls", +} + + +def _normalize_regex( + value: Optional[Union[str, Pattern[str]]], +) -> Optional[Pattern[str]]: + if value is None: + return None + if isinstance(value, re.Pattern): + return value + return re.compile(value) + + +def _normalize_config(config: ReplicationAdminAuthConfig) -> ReplicationAdminAuthConfig: + mode = (config.mode or "none").strip().lower() + if mode not in _VALID_REPLICATION_ADMIN_AUTH_MODES: + raise ValueError( + f"Invalid replication admin auth mode '{mode}'; expected " + "none|token|mtls|token_or_mtls|token_and_mtls" + ) + token = (config.token or "").strip() or None + if mode in {"token", "token_or_mtls", "token_and_mtls"} and not token: + raise ValueError( + f"replication admin auth mode '{mode}' requires a non-empty token" + ) + mtls_header = (config.mtls_header or "").strip().lower() or "x-forwarded-client-cert" + return ReplicationAdminAuthConfig( + mode=mode, # type: ignore[arg-type] + token=token, + mtls_header=mtls_header, + mtls_subject_regex=_normalize_regex(config.mtls_subject_regex), + mtls_matcher=config.mtls_matcher, + ) + + +def _get_header_value(headers: Any, name: str) -> Optional[str]: + if headers is None: + return None + if hasattr(headers, "get"): + direct = headers.get(name) + if direct is None: + direct = headers.get(name.lower()) + if isinstance(direct, str): + trimmed = direct.strip() + if trimmed: + return trimmed + if isinstance(headers, Mapping): + for key, value in headers.items(): + if str(key).lower() != name: + continue + if isinstance(value, str): + trimmed = value.strip() + if trimmed: + return trimmed + return None + + +def _as_bool(value: Any) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return value != 0 + if isinstance(value, str): + normalized = value.strip().lower() + return normalized in {"1", "true", "yes", "success"} + return False + + +def _has_peer_certificate(scope: Mapping[str, Any]) -> bool: + tls_extension = scope.get("extensions") + if isinstance(tls_extension, Mapping): + tls = tls_extension.get("tls") + if isinstance(tls, Mapping): + for key in ("client_cert", "peer_cert", "client_cert_chain"): + value = tls.get(key) + if value: + return True + for key in ("client_cert", "peer_cert", "client_cert_chain"): + value = scope.get(key) + if value: + return True + return False + + +def is_asgi_tls_client_authorized( + request: Any, options: Optional[AsgiMtlsMatcherOptions] = None +) -> bool: + scope = getattr(request, "scope", None) + if not isinstance(scope, Mapping): + return False + if _as_bool(scope.get("tls_client_authorized")) or _as_bool( + scope.get("client_cert_verified") + ) or _as_bool(scope.get("ssl_client_verify")): + if options and options.require_peer_certificate: + return _has_peer_certificate(scope) + return True + return False + + +def create_asgi_tls_mtls_matcher( + options: Optional[AsgiMtlsMatcherOptions] = None, +) -> Callable[[Any], bool]: + def _matcher(request: Any) -> bool: + return is_asgi_tls_client_authorized(request, options) + + return _matcher + + +def is_replication_admin_authorized( + request: Any, config: ReplicationAdminAuthConfig +) -> bool: + normalized = _normalize_config(config) + headers = getattr(request, "headers", None) + + token_ok = False + if normalized.token: + authorization = _get_header_value(headers, "authorization") + token_ok = authorization == f"Bearer {normalized.token}" + + if normalized.mtls_matcher is not None: + mtls_ok = bool(normalized.mtls_matcher(request)) + else: + mtls_value = _get_header_value(headers, normalized.mtls_header) + mtls_ok = mtls_value is not None + pattern = normalized.mtls_subject_regex + if mtls_ok and pattern is not None: + mtls_ok = bool(pattern.search(mtls_value)) + + if normalized.mode == "none": + return True + if normalized.mode == "token": + return token_ok + if normalized.mode == "mtls": + return mtls_ok + if normalized.mode == "token_or_mtls": + return token_ok or mtls_ok + return token_ok and mtls_ok + + +def authorize_replication_admin_request( + request: Any, config: ReplicationAdminAuthConfig +) -> None: + normalized = _normalize_config(config) + if is_replication_admin_authorized(request, normalized): + return + raise PermissionError( + f"Unauthorized: replication admin auth mode '{normalized.mode}' not satisfied" + ) + + +def create_replication_admin_authorizer( + config: ReplicationAdminAuthConfig, +) -> Callable[[Any], None]: + normalized = _normalize_config(config) + + def _authorizer(request: Any) -> None: + authorize_replication_admin_request(request, normalized) + + return _authorizer diff --git a/ray-rs/python/tests/test_database.py b/ray-rs/python/tests/test_database.py index df47f3f..0277bbb 100644 --- a/ray-rs/python/tests/test_database.py +++ b/ray-rs/python/tests/test_database.py @@ -14,6 +14,22 @@ class TestDatabase: """Test database operations.""" + def test_open_static_keeps_connection(self): + """Test Database.open() returns a reusable connection.""" + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "test.kitedb") + db = Database.open(path) + assert db.is_open + + db.begin() + node_id = db.create_node("user:alice") + db.commit() + + assert db.get_node_by_key("user:alice") == node_id + + db.close() + assert not db.is_open + def test_create_and_close(self): """Test database creation and closing.""" with tempfile.TemporaryDirectory() as tmpdir: diff --git a/ray-rs/python/tests/test_replication_auth.py b/ray-rs/python/tests/test_replication_auth.py new file mode 100644 index 0000000..4481673 --- /dev/null +++ b/ray-rs/python/tests/test_replication_auth.py @@ -0,0 +1,164 @@ +"""Tests for replication admin auth helpers.""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path +import sys +import pytest + +MODULE_PATH = Path(__file__).resolve().parents[1] / "kitedb" / "replication_auth.py" +MODULE_SPEC = importlib.util.spec_from_file_location("replication_auth", MODULE_PATH) +if MODULE_SPEC is None or MODULE_SPEC.loader is None: + raise RuntimeError(f"failed loading replication auth module from {MODULE_PATH}") +MODULE = importlib.util.module_from_spec(MODULE_SPEC) +sys.modules[MODULE_SPEC.name] = MODULE +MODULE_SPEC.loader.exec_module(MODULE) + +AsgiMtlsMatcherOptions = MODULE.AsgiMtlsMatcherOptions +ReplicationAdminAuthConfig = MODULE.ReplicationAdminAuthConfig +authorize_replication_admin_request = MODULE.authorize_replication_admin_request +create_asgi_tls_mtls_matcher = MODULE.create_asgi_tls_mtls_matcher +create_replication_admin_authorizer = MODULE.create_replication_admin_authorizer +is_asgi_tls_client_authorized = MODULE.is_asgi_tls_client_authorized +is_replication_admin_authorized = MODULE.is_replication_admin_authorized + + +class FakeRequest: + def __init__(self, headers=None, scope=None): + self.headers = headers or {} + self.scope = scope or {} + + +def test_replication_auth_none_mode_allows_any_request(): + request = FakeRequest() + config = ReplicationAdminAuthConfig(mode="none") + assert is_replication_admin_authorized(request, config) + authorize_replication_admin_request(request, config) + + +def test_replication_auth_token_mode_requires_bearer_token(): + config = ReplicationAdminAuthConfig(mode="token", token="abc123") + assert is_replication_admin_authorized( + FakeRequest(headers={"authorization": "Bearer abc123"}), config + ) + assert not is_replication_admin_authorized( + FakeRequest(headers={"authorization": "Bearer wrong"}), config + ) + + +def test_replication_auth_mtls_mode_supports_header_and_subject_regex(): + config = ReplicationAdminAuthConfig( + mode="mtls", + mtls_header="x-client-cert", + mtls_subject_regex=r"^CN=replication-admin,", + ) + assert is_replication_admin_authorized( + FakeRequest(headers={"x-client-cert": "CN=replication-admin,O=RayDB"}), + config, + ) + assert not is_replication_admin_authorized( + FakeRequest(headers={"x-client-cert": "CN=viewer,O=RayDB"}), + config, + ) + + +def test_replication_auth_token_or_and_modes(): + either = ReplicationAdminAuthConfig( + mode="token_or_mtls", + token="abc123", + mtls_header="x-client-cert", + ) + assert is_replication_admin_authorized( + FakeRequest(headers={"authorization": "Bearer abc123"}), either + ) + assert is_replication_admin_authorized( + FakeRequest(headers={"x-client-cert": "CN=replication-admin,O=RayDB"}), either + ) + assert not is_replication_admin_authorized(FakeRequest(), either) + + both = ReplicationAdminAuthConfig( + mode="token_and_mtls", + token="abc123", + mtls_header="x-client-cert", + ) + assert not is_replication_admin_authorized( + FakeRequest(headers={"authorization": "Bearer abc123"}), both + ) + assert not is_replication_admin_authorized( + FakeRequest(headers={"x-client-cert": "CN=replication-admin,O=RayDB"}), both + ) + assert is_replication_admin_authorized( + FakeRequest( + headers={ + "authorization": "Bearer abc123", + "x-client-cert": "CN=replication-admin,O=RayDB", + } + ), + both, + ) + + +def test_replication_auth_supports_custom_matcher_hook(): + request_ok = FakeRequest(scope={"tls_client_authorized": True}) + request_no = FakeRequest(scope={"tls_client_authorized": False}) + config = ReplicationAdminAuthConfig( + mode="mtls", + mtls_matcher=lambda request: bool(request.scope.get("tls_client_authorized")), + ) + assert is_replication_admin_authorized(request_ok, config) + assert not is_replication_admin_authorized(request_no, config) + + +def test_replication_auth_authorizer_rejects_invalid_config_and_unauthorized(): + with pytest.raises(ValueError, match="non-empty token"): + create_replication_admin_authorizer( + ReplicationAdminAuthConfig(mode="token", token=" ") + ) + + require_admin = create_replication_admin_authorizer( + ReplicationAdminAuthConfig(mode="token", token="abc123") + ) + with pytest.raises(PermissionError, match="not satisfied"): + require_admin(FakeRequest(headers={"authorization": "Bearer wrong"})) + + +def test_asgi_tls_client_authorized_helper_checks_scope_flags(): + assert is_asgi_tls_client_authorized( + FakeRequest(scope={"tls_client_authorized": True}) + ) + assert is_asgi_tls_client_authorized( + FakeRequest(scope={"client_cert_verified": True}) + ) + assert is_asgi_tls_client_authorized( + FakeRequest(scope={"ssl_client_verify": "SUCCESS"}) + ) + assert not is_asgi_tls_client_authorized(FakeRequest(scope={"ssl_client_verify": "FAILED"})) + + +def test_asgi_tls_client_authorized_helper_optionally_requires_peer_certificate(): + options = AsgiMtlsMatcherOptions(require_peer_certificate=True) + with_peer_cert = FakeRequest( + scope={ + "tls_client_authorized": True, + "extensions": {"tls": {"client_cert_chain": ["cert"]}}, + } + ) + without_peer_cert = FakeRequest(scope={"tls_client_authorized": True}) + assert is_asgi_tls_client_authorized(with_peer_cert, options) + assert not is_asgi_tls_client_authorized(without_peer_cert, options) + + +def test_create_asgi_tls_mtls_matcher_factory(): + matcher = create_asgi_tls_mtls_matcher( + AsgiMtlsMatcherOptions(require_peer_certificate=True) + ) + assert matcher( + FakeRequest( + scope={ + "tls_client_authorized": True, + "extensions": {"tls": {"client_cert": "cert"}}, + } + ) + ) + assert not matcher(FakeRequest(scope={"tls_client_authorized": True})) diff --git a/ray-rs/python/tests/test_replication_transport_flow.py b/ray-rs/python/tests/test_replication_transport_flow.py new file mode 100644 index 0000000..0149420 --- /dev/null +++ b/ray-rs/python/tests/test_replication_transport_flow.py @@ -0,0 +1,191 @@ +"""End-to-end replication transport/admin flow validation for Python bindings.""" + +from __future__ import annotations + +import importlib.util +import json +import os +from pathlib import Path +import sys +import tempfile + +import pytest + +PY_ROOT = Path(__file__).resolve().parents[1] +NATIVE_CANDIDATES = sorted((PY_ROOT / "kitedb").glob("_kitedb*.so")) +if not NATIVE_CANDIDATES: + raise RuntimeError(f"missing native extension under {PY_ROOT / 'kitedb'}") + +NATIVE_PATH = NATIVE_CANDIDATES[0] +NATIVE_SPEC = importlib.util.spec_from_file_location("_kitedb", NATIVE_PATH) +if NATIVE_SPEC is None or NATIVE_SPEC.loader is None: + raise RuntimeError(f"failed loading native module from {NATIVE_PATH}") +NATIVE = importlib.util.module_from_spec(NATIVE_SPEC) +sys.modules[NATIVE_SPEC.name] = NATIVE +NATIVE_SPEC.loader.exec_module(NATIVE) + +AUTH_PATH = PY_ROOT / "kitedb" / "replication_auth.py" +AUTH_SPEC = importlib.util.spec_from_file_location("kitedb_replication_auth", AUTH_PATH) +if AUTH_SPEC is None or AUTH_SPEC.loader is None: + raise RuntimeError(f"failed loading replication auth module from {AUTH_PATH}") +AUTH = importlib.util.module_from_spec(AUTH_SPEC) +sys.modules[AUTH_SPEC.name] = AUTH +AUTH_SPEC.loader.exec_module(AUTH) + +Database = NATIVE.Database +OpenOptions = NATIVE.OpenOptions +collect_replication_snapshot_transport_json = NATIVE.collect_replication_snapshot_transport_json +collect_replication_log_transport_json = NATIVE.collect_replication_log_transport_json +collect_replication_metrics_prometheus = NATIVE.collect_replication_metrics_prometheus + +ReplicationAdminAuthConfig = AUTH.ReplicationAdminAuthConfig +create_replication_admin_authorizer = AUTH.create_replication_admin_authorizer + + +class FakeRequest: + def __init__(self, headers: dict[str, str] | None = None): + self.headers = headers or {} + self.scope: dict[str, object] = {} + + +def _drain_replica(replica: object, max_frames: int, max_loops: int = 64) -> None: + for _ in range(max_loops): + applied = replica.replica_catch_up_once(max_frames) + if applied == 0: + return + + +def test_python_replication_transport_admin_flow_roundtrip(): + with tempfile.TemporaryDirectory() as tmpdir: + primary_path = os.path.join(tmpdir, "primary.kitedb") + primary_sidecar = os.path.join(tmpdir, "primary.sidecar") + replica_path = os.path.join(tmpdir, "replica.kitedb") + replica_sidecar = os.path.join(tmpdir, "replica.sidecar") + + primary = Database( + primary_path, + OpenOptions( + replication_role="primary", + replication_sidecar_path=primary_sidecar, + replication_segment_max_bytes=1, + replication_retention_min_entries=1, + auto_checkpoint=False, + ), + ) + stale = Database( + primary_path, + OpenOptions( + replication_role="primary", + replication_sidecar_path=primary_sidecar, + replication_segment_max_bytes=1, + replication_retention_min_entries=1, + auto_checkpoint=False, + ), + ) + replica = Database( + replica_path, + OpenOptions( + replication_role="replica", + replication_sidecar_path=replica_sidecar, + replication_source_db_path=primary_path, + replication_source_sidecar_path=primary_sidecar, + auto_checkpoint=False, + ), + ) + + try: + primary.begin(False) + primary.create_node("n:base") + token_base = primary.commit_with_token() + assert token_base.startswith("1:") + + replica.replica_bootstrap_from_snapshot() + replica_status = replica.replica_replication_status() + assert replica_status["needs_reseed"] is False + assert replica_status["applied_log_index"] == 1 + + snapshot = json.loads( + collect_replication_snapshot_transport_json(primary, include_data=False) + ) + snapshot_direct = json.loads( + primary.export_replication_snapshot_transport_json(False) + ) + assert snapshot["epoch"] == snapshot_direct["epoch"] + assert snapshot["head_log_index"] == snapshot_direct["head_log_index"] + + log_page = json.loads( + collect_replication_log_transport_json( + primary, + cursor=None, + max_frames=128, + max_bytes=1024 * 1024, + include_payload=False, + ) + ) + log_page_direct = json.loads( + primary.export_replication_log_transport_json( + None, + 128, + 1024 * 1024, + False, + ) + ) + assert log_page["frame_count"] == log_page_direct["frame_count"] + assert log_page["frame_count"] >= 1 + + prometheus = collect_replication_metrics_prometheus(primary) + assert "kitedb_replication_" in prometheus + + require_admin = create_replication_admin_authorizer( + ReplicationAdminAuthConfig(mode="token", token="secret-token") + ) + require_admin(FakeRequest({"authorization": "Bearer secret-token"})) + with pytest.raises(PermissionError, match="not satisfied"): + require_admin(FakeRequest({"authorization": "Bearer wrong-token"})) + + for i in range(6): + primary.begin(False) + primary.create_node(f"n:lag-{i}") + primary.commit_with_token() + + lag_status = replica.replica_replication_status() + primary.primary_report_replica_progress( + "replica-a", + lag_status["applied_epoch"], + lag_status["applied_log_index"], + ) + primary.primary_run_retention() + + with pytest.raises(Exception, match="reseed"): + replica.replica_catch_up_once(64) + assert replica.replica_replication_status()["needs_reseed"] is True + + primary.checkpoint() + replica.replica_reseed_from_snapshot() + assert replica.replica_replication_status()["needs_reseed"] is False + assert replica.count_nodes() == primary.count_nodes() + + before = primary.primary_replication_status()["epoch"] + promoted = primary.primary_promote_to_next_epoch() + assert promoted > before + + stale.begin(False) + stale.create_node("n:stale-write") + with pytest.raises(Exception, match="stale primary"): + stale.commit_with_token() + if stale.has_transaction(): + stale.rollback() + + primary.begin(False) + primary.create_node("n:post-promote") + promoted_token = primary.commit_with_token() + assert promoted_token.startswith(f"{promoted}:") + + assert not replica.wait_for_token(promoted_token, 5) + _drain_replica(replica, 128) + assert replica.wait_for_token(promoted_token, 2000) + assert replica.count_nodes() == primary.count_nodes() + finally: + replica.close() + stale.close() + primary.close() diff --git a/ray-rs/scripts/open-close-non-vector-gate.sh b/ray-rs/scripts/open-close-non-vector-gate.sh new file mode 100755 index 0000000..9fd0f48 --- /dev/null +++ b/ray-rs/scripts/open-close-non-vector-gate.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" + +ATTEMPTS="${ATTEMPTS:-1}" +MAX_SMALL_RW_US="${MAX_SMALL_RW_US:-900.0}" +MAX_SMALL_RO_US="${MAX_SMALL_RO_US:-900.0}" +MAX_LARGE_RW_US="${MAX_LARGE_RW_US:-5000.0}" +MAX_LARGE_RO_US="${MAX_LARGE_RO_US:-5000.0}" + +if [[ "$ATTEMPTS" -lt 1 ]]; then + echo "ATTEMPTS must be >= 1" + exit 1 +fi + +mkdir -p "$OUT_DIR" +STAMP="${STAMP:-$(date +%F)}" +LOG_BASE="$OUT_DIR/${STAMP}-open-close-non-vector-gate" +BENCH_FILTER='single_file_open_close/open_close/(rw|ro)/graph_10k_20k$|single_file_open_close_limits/open_close/(rw|ro)/graph_100k_200k$' + +extract_median_us() { + local logfile="$1" + local bench_id="$2" + local line + line="$( + awk -v bench_id="$bench_id" ' + $0 == bench_id { in_block = 1; next } + in_block && $1 == "time:" { print; exit } + ' "$logfile" + )" + if [[ -z "$line" ]]; then + return 1 + fi + + local value unit + value="$(awk '{print $4}' <<<"$line")" + unit="$(awk '{print $5}' <<<"$line")" + unit="${unit//]/}" + + awk -v value="$value" -v unit="$unit" 'BEGIN { + if (unit == "ns") { + printf "%.6f", value / 1000.0 + } else if (unit == "us" || unit == "µs") { + printf "%.6f", value + 0.0 + } else if (unit == "ms") { + printf "%.6f", value * 1000.0 + } else if (unit == "s") { + printf "%.6f", value * 1000000.0 + } else { + exit 1 + } + }' +} + +median() { + printf '%s\n' "$@" | sort -g | awk ' + { + a[NR] = $1 + } + END { + if (NR == 0) { + print "NaN" + } else if (NR % 2 == 1) { + printf "%.6f", a[(NR + 1) / 2] + } else { + printf "%.6f", (a[NR / 2] + a[NR / 2 + 1]) / 2 + } + } + ' +} + +declare -a small_rw_values=() +declare -a small_ro_values=() +declare -a large_rw_values=() +declare -a large_ro_values=() +last_log="" + +echo "== Open/close non-vector gate (attempts: $ATTEMPTS)" +for attempt in $(seq 1 "$ATTEMPTS"); do + if [[ "$ATTEMPTS" -eq 1 ]]; then + logfile="${LOG_BASE}.txt" + else + logfile="${LOG_BASE}.attempt${attempt}.txt" + fi + last_log="$logfile" + + ( + cd "$ROOT_DIR" + cargo bench --bench single_file --no-default-features -- "$BENCH_FILTER" >"$logfile" + ) + + small_rw_us="$(extract_median_us "$logfile" "single_file_open_close/open_close/rw/graph_10k_20k")" + small_ro_us="$(extract_median_us "$logfile" "single_file_open_close/open_close/ro/graph_10k_20k")" + large_rw_us="$(extract_median_us "$logfile" "single_file_open_close_limits/open_close/rw/graph_100k_200k")" + large_ro_us="$(extract_median_us "$logfile" "single_file_open_close_limits/open_close/ro/graph_100k_200k")" + + if [[ -z "$small_rw_us" || -z "$small_ro_us" || -z "$large_rw_us" || -z "$large_ro_us" ]]; then + echo "failed: could not parse one or more non-vector open/close medians" + echo "log: $logfile" + exit 1 + fi + + small_rw_values+=("$small_rw_us") + small_ro_values+=("$small_ro_us") + large_rw_values+=("$large_rw_us") + large_ro_values+=("$large_ro_us") + + echo "attempt $attempt/$ATTEMPTS:" + echo " small-rw median_us = $small_rw_us" + echo " small-ro median_us = $small_ro_us" + echo " large-rw median_us = $large_rw_us" + echo " large-ro median_us = $large_ro_us" +done + +median_small_rw="$(median "${small_rw_values[@]}")" +median_small_ro="$(median "${small_ro_values[@]}")" +median_large_rw="$(median "${large_rw_values[@]}")" +median_large_ro="$(median "${large_ro_values[@]}")" + +if [[ "$median_small_rw" == "NaN" || "$median_small_ro" == "NaN" || "$median_large_rw" == "NaN" || "$median_large_ro" == "NaN" ]]; then + echo "failed: no medians captured" + exit 1 +fi + +small_rw_pass="$(awk -v actual="$median_small_rw" -v max="$MAX_SMALL_RW_US" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" +small_ro_pass="$(awk -v actual="$median_small_ro" -v max="$MAX_SMALL_RO_US" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" +large_rw_pass="$(awk -v actual="$median_large_rw" -v max="$MAX_LARGE_RW_US" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" +large_ro_pass="$(awk -v actual="$median_large_ro" -v max="$MAX_LARGE_RO_US" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" + +echo "median small-rw across $ATTEMPTS attempt(s): ${median_small_rw}us (max allowed: ${MAX_SMALL_RW_US}us)" +echo "median small-ro across $ATTEMPTS attempt(s): ${median_small_ro}us (max allowed: ${MAX_SMALL_RO_US}us)" +echo "median large-rw across $ATTEMPTS attempt(s): ${median_large_rw}us (max allowed: ${MAX_LARGE_RW_US}us)" +echo "median large-ro across $ATTEMPTS attempt(s): ${median_large_ro}us (max allowed: ${MAX_LARGE_RO_US}us)" +echo "log: $last_log" + +if [[ "$small_rw_pass" != "yes" || "$small_ro_pass" != "yes" || "$large_rw_pass" != "yes" || "$large_ro_pass" != "yes" ]]; then + echo "failed: open/close non-vector gate not satisfied" + exit 1 +fi + +echo "pass: open/close non-vector gate satisfied" diff --git a/ray-rs/scripts/open-close-vector-gate.sh b/ray-rs/scripts/open-close-vector-gate.sh new file mode 100755 index 0000000..f7597b3 --- /dev/null +++ b/ray-rs/scripts/open-close-vector-gate.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" + +ATTEMPTS="${ATTEMPTS:-1}" +MAX_SMALL_RW_RATIO="${MAX_SMALL_RW_RATIO:-5.0}" +MAX_SMALL_RO_RATIO="${MAX_SMALL_RO_RATIO:-5.0}" +MAX_LARGE_RW_RATIO="${MAX_LARGE_RW_RATIO:-2.5}" +MAX_LARGE_RO_RATIO="${MAX_LARGE_RO_RATIO:-2.5}" + +if [[ "$ATTEMPTS" -lt 1 ]]; then + echo "ATTEMPTS must be >= 1" + exit 1 +fi + +mkdir -p "$OUT_DIR" +STAMP="${STAMP:-$(date +%F)}" +LOG_BASE="$OUT_DIR/${STAMP}-open-close-vector-gate" +BENCH_FILTER='single_file_open_close/open_close/(rw|ro)/graph_10k_20k(_vec5k)?$|single_file_open_close_limits/open_close/(rw|ro)/graph_100k_200k(_vec20k)?$' + +extract_median_us() { + local logfile="$1" + local bench_id="$2" + local line + line="$( + awk -v bench_id="$bench_id" ' + $0 == bench_id { in_block = 1; next } + in_block && $1 == "time:" { print; exit } + ' "$logfile" + )" + if [[ -z "$line" ]]; then + return 1 + fi + + local value unit + value="$(awk '{print $4}' <<<"$line")" + unit="$(awk '{print $5}' <<<"$line")" + unit="${unit//]/}" + + awk -v value="$value" -v unit="$unit" 'BEGIN { + if (unit == "ns") { + printf "%.6f", value / 1000.0 + } else if (unit == "us" || unit == "µs") { + printf "%.6f", value + 0.0 + } else if (unit == "ms") { + printf "%.6f", value * 1000.0 + } else if (unit == "s") { + printf "%.6f", value * 1000000.0 + } else { + exit 1 + } + }' +} + +median() { + printf '%s\n' "$@" | sort -g | awk ' + { + a[NR] = $1 + } + END { + if (NR == 0) { + print "NaN" + } else if (NR % 2 == 1) { + printf "%.6f", a[(NR + 1) / 2] + } else { + printf "%.6f", (a[NR / 2] + a[NR / 2 + 1]) / 2 + } + } + ' +} + +declare -a small_rw_ratios=() +declare -a small_ro_ratios=() +declare -a large_rw_ratios=() +declare -a large_ro_ratios=() +last_log="" + +echo "== Open/close vector gate (attempts: $ATTEMPTS)" +for attempt in $(seq 1 "$ATTEMPTS"); do + if [[ "$ATTEMPTS" -eq 1 ]]; then + logfile="${LOG_BASE}.txt" + else + logfile="${LOG_BASE}.attempt${attempt}.txt" + fi + last_log="$logfile" + + ( + cd "$ROOT_DIR" + cargo bench --bench single_file --no-default-features -- "$BENCH_FILTER" >"$logfile" + ) + + small_rw_base_us="$(extract_median_us "$logfile" "single_file_open_close/open_close/rw/graph_10k_20k")" + small_rw_vec_us="$(extract_median_us "$logfile" "single_file_open_close/open_close/rw/graph_10k_20k_vec5k")" + small_ro_base_us="$(extract_median_us "$logfile" "single_file_open_close/open_close/ro/graph_10k_20k")" + small_ro_vec_us="$(extract_median_us "$logfile" "single_file_open_close/open_close/ro/graph_10k_20k_vec5k")" + large_rw_base_us="$(extract_median_us "$logfile" "single_file_open_close_limits/open_close/rw/graph_100k_200k")" + large_rw_vec_us="$(extract_median_us "$logfile" "single_file_open_close_limits/open_close/rw/graph_100k_200k_vec20k")" + large_ro_base_us="$(extract_median_us "$logfile" "single_file_open_close_limits/open_close/ro/graph_100k_200k")" + large_ro_vec_us="$(extract_median_us "$logfile" "single_file_open_close_limits/open_close/ro/graph_100k_200k_vec20k")" + + if [[ -z "$small_rw_base_us" || -z "$small_rw_vec_us" || -z "$small_ro_base_us" || -z "$small_ro_vec_us" || -z "$large_rw_base_us" || -z "$large_rw_vec_us" || -z "$large_ro_base_us" || -z "$large_ro_vec_us" ]]; then + echo "failed: could not parse one or more open/close benchmark medians" + echo "log: $logfile" + exit 1 + fi + + ratio_small_rw="$(awk -v base="$small_rw_base_us" -v vec="$small_rw_vec_us" 'BEGIN { printf "%.6f", vec / base }')" + ratio_small_ro="$(awk -v base="$small_ro_base_us" -v vec="$small_ro_vec_us" 'BEGIN { printf "%.6f", vec / base }')" + ratio_large_rw="$(awk -v base="$large_rw_base_us" -v vec="$large_rw_vec_us" 'BEGIN { printf "%.6f", vec / base }')" + ratio_large_ro="$(awk -v base="$large_ro_base_us" -v vec="$large_ro_vec_us" 'BEGIN { printf "%.6f", vec / base }')" + + small_rw_ratios+=("$ratio_small_rw") + small_ro_ratios+=("$ratio_small_ro") + large_rw_ratios+=("$ratio_large_rw") + large_ro_ratios+=("$ratio_large_ro") + + echo "attempt $attempt/$ATTEMPTS:" + echo " small-rw ratio(vec/non-vec) = $ratio_small_rw" + echo " small-ro ratio(vec/non-vec) = $ratio_small_ro" + echo " large-rw ratio(vec/non-vec) = $ratio_large_rw" + echo " large-ro ratio(vec/non-vec) = $ratio_large_ro" +done + +median_small_rw="$(median "${small_rw_ratios[@]}")" +median_small_ro="$(median "${small_ro_ratios[@]}")" +median_large_rw="$(median "${large_rw_ratios[@]}")" +median_large_ro="$(median "${large_ro_ratios[@]}")" + +if [[ "$median_small_rw" == "NaN" || "$median_small_ro" == "NaN" || "$median_large_rw" == "NaN" || "$median_large_ro" == "NaN" ]]; then + echo "failed: no ratios captured" + exit 1 +fi + +small_rw_pass="$(awk -v actual="$median_small_rw" -v max="$MAX_SMALL_RW_RATIO" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" +small_ro_pass="$(awk -v actual="$median_small_ro" -v max="$MAX_SMALL_RO_RATIO" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" +large_rw_pass="$(awk -v actual="$median_large_rw" -v max="$MAX_LARGE_RW_RATIO" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" +large_ro_pass="$(awk -v actual="$median_large_ro" -v max="$MAX_LARGE_RO_RATIO" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" + +echo "median small-rw ratio across $ATTEMPTS attempt(s): $median_small_rw (max allowed: $MAX_SMALL_RW_RATIO)" +echo "median small-ro ratio across $ATTEMPTS attempt(s): $median_small_ro (max allowed: $MAX_SMALL_RO_RATIO)" +echo "median large-rw ratio across $ATTEMPTS attempt(s): $median_large_rw (max allowed: $MAX_LARGE_RW_RATIO)" +echo "median large-ro ratio across $ATTEMPTS attempt(s): $median_large_ro (max allowed: $MAX_LARGE_RO_RATIO)" +echo "log: $last_log" + +if [[ "$small_rw_pass" != "yes" || "$small_ro_pass" != "yes" || "$large_rw_pass" != "yes" || "$large_ro_pass" != "yes" ]]; then + echo "failed: open/close vector gate not satisfied" + exit 1 +fi + +echo "pass: open/close vector gate satisfied" diff --git a/ray-rs/scripts/release-preflight.sh b/ray-rs/scripts/release-preflight.sh new file mode 100755 index 0000000..976e108 --- /dev/null +++ b/ray-rs/scripts/release-preflight.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +RAY_RS_DIR="$ROOT_DIR/ray-rs" + +usage() { + cat < npm next). +routing_re='^(all|js|ts|py|rs|core): ([0-9]+)\.([0-9]+)\.([0-9]+)( .+)?$' + +if [[ "$commit_msg" =~ $release_re ]]; then + channel="${BASH_REMATCH[1]}" + version="${BASH_REMATCH[2]}.${BASH_REMATCH[3]}.${BASH_REMATCH[4]}" + echo "ok: strict release commit message" + echo " channel=$channel version=$version" +elif [[ "$commit_msg" =~ $routing_re ]]; then + channel="${BASH_REMATCH[1]}" + version="${BASH_REMATCH[2]}.${BASH_REMATCH[3]}.${BASH_REMATCH[4]}" + echo "failed: commit message has trailing text; this routes npm to next" + echo " message=$commit_msg" + echo " expected exact format: all|js|ts|py|rs|core: X.Y.Z" + exit 1 +else + echo "failed: commit message does not match release-gate format" + echo " message=$commit_msg" + echo " expected exact format: all|js|ts|py|rs|core: X.Y.Z" + exit 1 +fi + +if [[ -n "$tag" ]]; then + if [[ ! "$tag" =~ ^v([0-9]+)\.([0-9]+)\.([0-9]+)$ ]]; then + echo "failed: tag must match vX.Y.Z" + echo " tag=$tag" + exit 1 + fi + + tag_version="${BASH_REMATCH[1]}.${BASH_REMATCH[2]}.${BASH_REMATCH[3]}" + + package_version="$(node -e "const p=require('$RAY_RS_DIR/package.json');process.stdout.write(String(p.version||''))")" + if [[ -z "$package_version" ]]; then + echo "failed: could not read ray-rs/package.json version" + exit 1 + fi + + if [[ "$package_version" != "$tag_version" ]]; then + echo "failed: ray-rs/package.json version does not match tag" + echo " package.json=$package_version" + echo " tag=$tag" + exit 1 + fi + + if [[ "$version" != "$tag_version" ]]; then + echo "failed: commit message version does not match tag" + echo " commit=$version" + echo " tag=$tag" + exit 1 + fi + + echo "ok: tag + package version + commit version aligned" + echo " tag=$tag" +fi + +echo "pass: release preflight checks satisfied" diff --git a/ray-rs/scripts/replication-bench-gate.sh b/ray-rs/scripts/replication-bench-gate.sh new file mode 100755 index 0000000..fcb74e4 --- /dev/null +++ b/ray-rs/scripts/replication-bench-gate.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" + +ITERATIONS="${ITERATIONS:-20000}" +NODES="${NODES:-10000}" +EDGES="${EDGES:-0}" +EDGE_TYPES="${EDGE_TYPES:-1}" +EDGE_PROPS="${EDGE_PROPS:-0}" +VECTOR_COUNT="${VECTOR_COUNT:-0}" +SYNC_MODE="${SYNC_MODE:-normal}" +REPLICATION_SEGMENT_MAX_BYTES="${REPLICATION_SEGMENT_MAX_BYTES:-1073741824}" +P95_MAX_RATIO="${P95_MAX_RATIO:-1.30}" +ATTEMPTS="${ATTEMPTS:-7}" + +if [[ "$ITERATIONS" -lt 100 ]]; then + echo "ITERATIONS must be >= 100 (single_file_raw_bench writes run iterations/100 batches)" + exit 1 +fi +if [[ "$ATTEMPTS" -lt 1 ]]; then + echo "ATTEMPTS must be >= 1" + exit 1 +fi + +mkdir -p "$OUT_DIR" +STAMP="${STAMP:-$(date +%F)}" +BASELINE_LOG_BASE="$OUT_DIR/${STAMP}-replication-gate-baseline" +PRIMARY_LOG_BASE="$OUT_DIR/${STAMP}-replication-gate-primary" + +run_bench() { + local logfile="$1" + shift + ( + cd "$ROOT_DIR" + cargo run --release --example single_file_raw_bench --no-default-features -- \ + --nodes "$NODES" \ + --edges "$EDGES" \ + --edge-types "$EDGE_TYPES" \ + --edge-props "$EDGE_PROPS" \ + --vector-count "$VECTOR_COUNT" \ + --iterations "$ITERATIONS" \ + --sync-mode "$SYNC_MODE" \ + --replication-segment-max-bytes "$REPLICATION_SEGMENT_MAX_BYTES" \ + --no-auto-checkpoint \ + "$@" >"$logfile" + ) +} + +extract_batch_write_p95() { + local logfile="$1" + grep "Batch of 100 nodes" "$logfile" | tail -1 | sed -E 's/.*p95= *([^ ]+).*/\1/' +} + +latency_to_ns() { + local token="$1" + awk -v value="$token" 'BEGIN { + if (value ~ /ns$/) { + sub(/ns$/, "", value) + printf "%.0f", value + 0 + exit + } + if (value ~ /us$/) { + sub(/us$/, "", value) + printf "%.0f", (value + 0) * 1000 + exit + } + if (value ~ /ms$/) { + sub(/ms$/, "", value) + printf "%.0f", (value + 0) * 1000000 + exit + } + printf "-1" + }' +} + +declare -a ratios + +echo "== Replication gate: baseline vs primary (attempts: $ATTEMPTS)" +for attempt in $(seq 1 "$ATTEMPTS"); do + if [[ "$ATTEMPTS" -eq 1 ]]; then + baseline_log="${BASELINE_LOG_BASE}.txt" + primary_log="${PRIMARY_LOG_BASE}.txt" + else + baseline_log="${BASELINE_LOG_BASE}.attempt${attempt}.txt" + primary_log="${PRIMARY_LOG_BASE}.attempt${attempt}.txt" + fi + + echo "attempt $attempt/$ATTEMPTS: baseline (replication disabled)" + run_bench "$baseline_log" + + echo "attempt $attempt/$ATTEMPTS: primary sidecar enabled" + run_bench "$primary_log" --replication-primary + + baseline_token="$(extract_batch_write_p95 "$baseline_log")" + primary_token="$(extract_batch_write_p95 "$primary_log")" + + if [[ -z "$baseline_token" || -z "$primary_token" ]]; then + echo "failed: could not parse p95 batch write metric from benchmark output" + echo "baseline log: $baseline_log" + echo "primary log: $primary_log" + exit 1 + fi + + baseline_ns="$(latency_to_ns "$baseline_token")" + primary_ns="$(latency_to_ns "$primary_token")" + if [[ "$baseline_ns" -le 0 || "$primary_ns" -le 0 ]]; then + echo "failed: unsupported latency token(s): baseline=$baseline_token primary=$primary_token" + exit 1 + fi + + ratio="$(awk -v base="$baseline_ns" -v primary="$primary_ns" 'BEGIN { printf "%.6f", primary / base }')" + ratios+=("$ratio") + + echo "attempt $attempt/$ATTEMPTS metrics: baseline=$baseline_token ($baseline_ns ns) primary=$primary_token ($primary_ns ns) ratio=$ratio" + echo "logs:" + echo " $baseline_log" + echo " $primary_log" +done + +ratio_count="${#ratios[@]}" +median_ratio="$( + printf '%s\n' "${ratios[@]}" \ + | sort -g \ + | awk '{ + a[NR]=$1 + } + END { + if (NR == 0) { + print "NaN" + } else if (NR % 2 == 1) { + printf "%.6f", a[(NR + 1) / 2] + } else { + printf "%.6f", (a[NR / 2] + a[NR / 2 + 1]) / 2 + } + }' +)" + +if [[ "$median_ratio" == "NaN" ]]; then + echo "failed: no ratios captured" + exit 1 +fi + +pass="$(awk -v ratio="$median_ratio" -v max="$P95_MAX_RATIO" 'BEGIN { if (ratio <= max) print "yes"; else print "no" }')" +echo "median ratio across $ratio_count attempt(s): $median_ratio (max allowed: $P95_MAX_RATIO)" + +if [[ "$pass" != "yes" ]]; then + echo "failed: replication-on p95 median ratio exceeded gate" + exit 1 +fi + +echo "pass: replication p95 gate satisfied" diff --git a/ray-rs/scripts/replication-catchup-gate.sh b/ray-rs/scripts/replication-catchup-gate.sh new file mode 100755 index 0000000..d5e8d9b --- /dev/null +++ b/ray-rs/scripts/replication-catchup-gate.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" + +SEED_COMMITS="${SEED_COMMITS:-1000}" +BACKLOG_COMMITS="${BACKLOG_COMMITS:-5000}" +MAX_FRAMES="${MAX_FRAMES:-256}" +SYNC_MODE="${SYNC_MODE:-normal}" +SEGMENT_MAX_BYTES="${SEGMENT_MAX_BYTES:-67108864}" +RETENTION_MIN="${RETENTION_MIN:-20000}" +MIN_CATCHUP_FPS="${MIN_CATCHUP_FPS:-2000}" +MIN_THROUGHPUT_RATIO="${MIN_THROUGHPUT_RATIO:-0.09}" +ATTEMPTS="${ATTEMPTS:-3}" + +if [[ "$BACKLOG_COMMITS" -lt 100 ]]; then + echo "BACKLOG_COMMITS must be >= 100 for stable catch-up measurements" + exit 1 +fi +if [[ "$ATTEMPTS" -lt 1 ]]; then + echo "ATTEMPTS must be >= 1" + exit 1 +fi + +mkdir -p "$OUT_DIR" +STAMP="${STAMP:-$(date +%F)}" +LOGFILE_BASE="$OUT_DIR/${STAMP}-replication-catchup-gate" + +best_catchup_fps=0 +best_ratio=0 +best_logfile="" +last_logfile="" +last_catchup_fps="" +last_primary_fps="" +last_ratio="" +last_applied_frames="" + +run_once() { + local logfile="$1" + ( + cd "$ROOT_DIR" + cargo run --release --example replication_catchup_bench --no-default-features -- \ + --seed-commits "$SEED_COMMITS" \ + --backlog-commits "$BACKLOG_COMMITS" \ + --max-frames "$MAX_FRAMES" \ + --sync-mode "$SYNC_MODE" \ + --segment-max-bytes "$SEGMENT_MAX_BYTES" \ + --retention-min "$RETENTION_MIN" >"$logfile" + ) +} + +echo "== Replication catch-up gate (attempts: $ATTEMPTS)" +for attempt in $(seq 1 "$ATTEMPTS"); do + if [[ "$ATTEMPTS" -eq 1 ]]; then + logfile="${LOGFILE_BASE}.txt" + else + logfile="${LOGFILE_BASE}.attempt${attempt}.txt" + fi + + run_once "$logfile" + + catchup_fps="$(grep '^catchup_frames_per_sec:' "$logfile" | tail -1 | awk '{print $2}')" + primary_fps="$(grep '^primary_frames_per_sec:' "$logfile" | tail -1 | awk '{print $2}')" + ratio="$(grep '^throughput_ratio:' "$logfile" | tail -1 | awk '{print $2}')" + applied_frames="$(grep '^applied_frames:' "$logfile" | tail -1 | awk '{print $2}')" + + if [[ -z "$catchup_fps" || -z "$primary_fps" || -z "$ratio" || -z "$applied_frames" ]]; then + echo "failed: could not parse catch-up metrics from benchmark output" + echo "log: $logfile" + exit 1 + fi + + last_logfile="$logfile" + last_catchup_fps="$catchup_fps" + last_primary_fps="$primary_fps" + last_ratio="$ratio" + last_applied_frames="$applied_frames" + + if awk -v current="$catchup_fps" -v best="$best_catchup_fps" 'BEGIN { exit !(current > best) }'; then + best_catchup_fps="$catchup_fps" + best_ratio="$ratio" + best_logfile="$logfile" + fi + + fps_pass="$(awk -v actual="$catchup_fps" -v min="$MIN_CATCHUP_FPS" 'BEGIN { if (actual >= min) print "yes"; else print "no" }')" + ratio_pass="$(awk -v actual="$ratio" -v min="$MIN_THROUGHPUT_RATIO" 'BEGIN { if (actual >= min) print "yes"; else print "no" }')" + + echo "attempt $attempt/$ATTEMPTS: applied=$applied_frames primary_fps=$primary_fps catchup_fps=$catchup_fps ratio=$ratio" + + if [[ "$fps_pass" == "yes" && "$ratio_pass" == "yes" ]]; then + echo "pass: replication catch-up throughput gate satisfied" + echo "log:" + echo " $logfile" + exit 0 + fi +done + +echo "failed: catch-up throughput gate did not pass in $ATTEMPTS attempt(s)" +echo "last attempt: applied frames=$last_applied_frames primary frames/sec=$last_primary_fps catchup frames/sec=$last_catchup_fps ratio=$last_ratio" +echo "thresholds: catchup_fps >= $MIN_CATCHUP_FPS, ratio >= $MIN_THROUGHPUT_RATIO" +if [[ -n "$best_logfile" ]]; then + echo "best attempt: catchup_fps=$best_catchup_fps ratio=$best_ratio log=$best_logfile" +fi +echo "last log:" +echo " $last_logfile" +exit 1 diff --git a/ray-rs/scripts/replication-perf-gate.sh b/ray-rs/scripts/replication-perf-gate.sh new file mode 100755 index 0000000..b9843eb --- /dev/null +++ b/ray-rs/scripts/replication-perf-gate.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +echo "== Replication perf gate: commit overhead" +"$ROOT_DIR/scripts/replication-bench-gate.sh" + +echo +echo "== Replication perf gate: replica catch-up throughput" +"$ROOT_DIR/scripts/replication-catchup-gate.sh" + +echo +echo "pass: all replication perf gates satisfied" diff --git a/ray-rs/scripts/replication-soak-gate.sh b/ray-rs/scripts/replication-soak-gate.sh new file mode 100755 index 0000000..8473127 --- /dev/null +++ b/ray-rs/scripts/replication-soak-gate.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" + +REPLICAS="${REPLICAS:-5}" +CYCLES="${CYCLES:-6}" +COMMITS_PER_CYCLE="${COMMITS_PER_CYCLE:-40}" +ACTIVE_REPLICAS="${ACTIVE_REPLICAS:-3}" +CHURN_INTERVAL="${CHURN_INTERVAL:-2}" +PROMOTION_INTERVAL="${PROMOTION_INTERVAL:-3}" +RESEED_CHECK_INTERVAL="${RESEED_CHECK_INTERVAL:-2}" +MAX_FRAMES="${MAX_FRAMES:-128}" +RECOVERY_MAX_LOOPS="${RECOVERY_MAX_LOOPS:-80}" +SEGMENT_MAX_BYTES="${SEGMENT_MAX_BYTES:-1}" +RETENTION_MIN="${RETENTION_MIN:-64}" +SYNC_MODE="${SYNC_MODE:-normal}" +ATTEMPTS="${ATTEMPTS:-1}" + +MAX_ALLOWED_LAG="${MAX_ALLOWED_LAG:-1200}" +MIN_PROMOTIONS="${MIN_PROMOTIONS:-2}" +MIN_RESEEDS="${MIN_RESEEDS:-0}" + +if [[ "$ATTEMPTS" -lt 1 ]]; then + echo "ATTEMPTS must be >= 1" + exit 1 +fi + +mkdir -p "$OUT_DIR" +STAMP="${STAMP:-$(date +%F)}" +LOGFILE_BASE="$OUT_DIR/${STAMP}-replication-soak-gate" + +extract_metric() { + local key="$1" + local file="$2" + grep "^${key}:" "$file" | tail -1 | awk '{print $2}' +} + +echo "== Replication soak gate (attempts: $ATTEMPTS)" +for attempt in $(seq 1 "$ATTEMPTS"); do + if [[ "$ATTEMPTS" -eq 1 ]]; then + logfile="${LOGFILE_BASE}.txt" + else + logfile="${LOGFILE_BASE}.attempt${attempt}.txt" + fi + + ( + cd "$ROOT_DIR" + cargo run --release --example replication_soak_bench --no-default-features -- \ + --replicas "$REPLICAS" \ + --cycles "$CYCLES" \ + --commits-per-cycle "$COMMITS_PER_CYCLE" \ + --active-replicas "$ACTIVE_REPLICAS" \ + --churn-interval "$CHURN_INTERVAL" \ + --promotion-interval "$PROMOTION_INTERVAL" \ + --reseed-check-interval "$RESEED_CHECK_INTERVAL" \ + --max-frames "$MAX_FRAMES" \ + --recovery-max-loops "$RECOVERY_MAX_LOOPS" \ + --segment-max-bytes "$SEGMENT_MAX_BYTES" \ + --retention-min "$RETENTION_MIN" \ + --sync-mode "$SYNC_MODE" >"$logfile" + ) + + divergence="$(extract_metric divergence_violations "$logfile")" + promotions="$(extract_metric promotion_count "$logfile")" + stale_fence="$(extract_metric stale_fence_rejections "$logfile")" + reseeds="$(extract_metric reseed_count "$logfile")" + recovery_loops="$(extract_metric max_recovery_loops "$logfile")" + max_lag="$(extract_metric max_observed_lag "$logfile")" + + if [[ -z "$divergence" || -z "$promotions" || -z "$stale_fence" || -z "$reseeds" || -z "$recovery_loops" || -z "$max_lag" ]]; then + echo "failed: could not parse soak metrics" + echo "log: $logfile" + exit 1 + fi + + divergence_pass="no" + stale_pass="no" + promotions_pass="no" + reseed_pass="no" + recovery_pass="no" + lag_pass="no" + + [[ "$divergence" -eq 0 ]] && divergence_pass="yes" + [[ "$stale_fence" -eq "$promotions" ]] && stale_pass="yes" + [[ "$promotions" -ge "$MIN_PROMOTIONS" ]] && promotions_pass="yes" + [[ "$reseeds" -ge "$MIN_RESEEDS" ]] && reseed_pass="yes" + [[ "$recovery_loops" -le "$RECOVERY_MAX_LOOPS" ]] && recovery_pass="yes" + [[ "$max_lag" -le "$MAX_ALLOWED_LAG" ]] && lag_pass="yes" + + echo "attempt $attempt/$ATTEMPTS: divergence=$divergence promotions=$promotions stale_fence=$stale_fence reseeds=$reseeds max_recovery_loops=$recovery_loops max_lag=$max_lag" + + if [[ "$divergence_pass" == "yes" && "$stale_pass" == "yes" && "$promotions_pass" == "yes" && "$reseed_pass" == "yes" && "$recovery_pass" == "yes" && "$lag_pass" == "yes" ]]; then + echo "pass: replication soak gate satisfied" + echo "log:" + echo " $logfile" + exit 0 + fi +done + +echo "failed: replication soak gate did not pass in $ATTEMPTS attempt(s)" +echo "thresholds: divergence=0, stale_fence==promotions, promotions>=${MIN_PROMOTIONS}, reseeds>=${MIN_RESEEDS}, max_recovery_loops<=${RECOVERY_MAX_LOOPS}, max_lag<=${MAX_ALLOWED_LAG}" +echo "last log:" +echo " $logfile" +exit 1 diff --git a/ray-rs/scripts/vector-ann-gate.sh b/ray-rs/scripts/vector-ann-gate.sh new file mode 100755 index 0000000..b93d6d1 --- /dev/null +++ b/ray-rs/scripts/vector-ann-gate.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" + +ALGORITHM="${ALGORITHM:-ivf_pq}" +RESIDUALS="${RESIDUALS:-false}" +VECTORS="${VECTORS:-20000}" +DIMENSIONS="${DIMENSIONS:-384}" +QUERIES="${QUERIES:-200}" +K="${K:-10}" +N_CLUSTERS="${N_CLUSTERS:-}" +N_PROBE="${N_PROBE:-16}" +PQ_SUBSPACES="${PQ_SUBSPACES:-48}" +PQ_CENTROIDS="${PQ_CENTROIDS:-256}" +SEED="${SEED:-42}" +ATTEMPTS="${ATTEMPTS:-3}" + +MIN_RECALL_AT_K="${MIN_RECALL_AT_K:-0.16}" +MAX_P95_MS="${MAX_P95_MS:-8.0}" + +if [[ "$ATTEMPTS" -lt 1 ]]; then + echo "ATTEMPTS must be >= 1" + exit 1 +fi + +mkdir -p "$OUT_DIR" +STAMP="${STAMP:-$(date +%F)}" +LOG_BASE="$OUT_DIR/${STAMP}-vector-ann-gate" + +declare -a recalls=() +declare -a p95s=() +last_log="" + +run_once() { + local logfile="$1" + local extra_args=() + if [[ -n "$N_CLUSTERS" ]]; then + extra_args+=(--n-clusters "$N_CLUSTERS") + fi + if [[ "$ALGORITHM" == "ivf_pq" ]]; then + extra_args+=(--pq-subspaces "$PQ_SUBSPACES" --pq-centroids "$PQ_CENTROIDS" --residuals "$RESIDUALS") + fi + + ( + cd "$ROOT_DIR" + cargo run --release --no-default-features --example vector_ann_bench -- \ + --algorithm "$ALGORITHM" \ + --vectors "$VECTORS" \ + --dimensions "$DIMENSIONS" \ + --queries "$QUERIES" \ + --k "$K" \ + --n-probe "$N_PROBE" \ + --seed "$SEED" \ + "${extra_args[@]}" >"$logfile" + ) +} + +echo "== Vector ANN gate (attempts: $ATTEMPTS)" +for attempt in $(seq 1 "$ATTEMPTS"); do + if [[ "$ATTEMPTS" -eq 1 ]]; then + logfile="${LOG_BASE}.txt" + else + logfile="${LOG_BASE}.attempt${attempt}.txt" + fi + + run_once "$logfile" + last_log="$logfile" + + recall="$(grep '^mean_recall_at_k:' "$logfile" | tail -1 | awk '{print $2}')" + p95="$(grep '^search_p95_ms:' "$logfile" | tail -1 | awk '{print $2}')" + + if [[ -z "$recall" || -z "$p95" ]]; then + echo "failed: could not parse ANN metrics" + echo "log: $logfile" + exit 1 + fi + + recalls+=("$recall") + p95s+=("$p95") + echo "attempt $attempt/$ATTEMPTS: recall_at_k=$recall p95_ms=$p95" +done + +median() { + printf '%s\n' "$@" | sort -g | awk ' + { + a[NR] = $1 + } + END { + if (NR == 0) { + print "NaN" + } else if (NR % 2 == 1) { + printf "%.6f", a[(NR + 1) / 2] + } else { + printf "%.6f", (a[NR / 2] + a[NR / 2 + 1]) / 2 + } + } + ' +} + +median_recall="$(median "${recalls[@]}")" +median_p95="$(median "${p95s[@]}")" + +if [[ "$median_recall" == "NaN" || "$median_p95" == "NaN" ]]; then + echo "failed: no metrics captured" + exit 1 +fi + +recall_pass="$(awk -v actual="$median_recall" -v min="$MIN_RECALL_AT_K" 'BEGIN { if (actual >= min) print "yes"; else print "no" }')" +p95_pass="$(awk -v actual="$median_p95" -v max="$MAX_P95_MS" 'BEGIN { if (actual <= max) print "yes"; else print "no" }')" + +echo "median recall_at_k across $ATTEMPTS attempt(s): $median_recall (min required: $MIN_RECALL_AT_K)" +echo "median p95_ms across $ATTEMPTS attempt(s): $median_p95 (max allowed: $MAX_P95_MS)" +echo "log: $last_log" + +if [[ "$recall_pass" != "yes" || "$p95_pass" != "yes" ]]; then + echo "failed: ANN gate not satisfied" + exit 1 +fi + +echo "pass: ANN gate satisfied" diff --git a/ray-rs/scripts/vector-ann-matrix.sh b/ray-rs/scripts/vector-ann-matrix.sh new file mode 100755 index 0000000..8141630 --- /dev/null +++ b/ray-rs/scripts/vector-ann-matrix.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" +STAMP="${STAMP:-$(date +%F)}" + +VECTORS="${VECTORS:-20000}" +DIMENSIONS="${DIMENSIONS:-384}" +QUERIES="${QUERIES:-200}" +K="${K:-10}" +N_CLUSTERS="${N_CLUSTERS:-}" +N_PROBES="${N_PROBES:-8 16}" +PQ_SUBSPACES="${PQ_SUBSPACES:-48}" +PQ_CENTROIDS="${PQ_CENTROIDS:-256}" +SEED="${SEED:-42}" + +mkdir -p "$OUT_DIR" +RAW_OUT="$OUT_DIR/${STAMP}-vector-ann-matrix.txt" +CSV_OUT="$OUT_DIR/${STAMP}-vector-ann-matrix.csv" + +echo "Vector ANN matrix benchmark" >"$RAW_OUT" +echo "date=${STAMP}" >>"$RAW_OUT" +echo "vectors=${VECTORS} dimensions=${DIMENSIONS} queries=${QUERIES} k=${K}" >>"$RAW_OUT" +echo "n_probes={${N_PROBES}}" >>"$RAW_OUT" +echo "pq_subspaces=${PQ_SUBSPACES} pq_centroids=${PQ_CENTROIDS}" >>"$RAW_OUT" +echo "seed=${SEED}" >>"$RAW_OUT" +echo >>"$RAW_OUT" + +printf "algorithm,residuals,n_probe,build_elapsed_ms,search_p50_ms,search_p95_ms,mean_recall_at_k\n" >"$CSV_OUT" + +run_case() { + local algorithm="$1" + local residuals="$2" + local n_probe="$3" + + local extra_args=() + if [[ -n "$N_CLUSTERS" ]]; then + extra_args+=(--n-clusters "$N_CLUSTERS") + fi + if [[ "$algorithm" == "ivf_pq" ]]; then + extra_args+=(--pq-subspaces "$PQ_SUBSPACES" --pq-centroids "$PQ_CENTROIDS" --residuals "$residuals") + fi + + echo "RUN algorithm=${algorithm} residuals=${residuals} n_probe=${n_probe}" | tee -a "$RAW_OUT" + run_out="$( + cd "$ROOT_DIR" + cargo run --release --no-default-features --example vector_ann_bench -- \ + --algorithm "$algorithm" \ + --vectors "$VECTORS" \ + --dimensions "$DIMENSIONS" \ + --queries "$QUERIES" \ + --k "$K" \ + --n-probe "$n_probe" \ + --seed "$SEED" \ + "${extra_args[@]}" + )" + echo "$run_out" >>"$RAW_OUT" + echo >>"$RAW_OUT" + + build_ms="$(echo "$run_out" | rg '^build_elapsed_ms:' | awk '{print $2}')" + p50_ms="$(echo "$run_out" | rg '^search_p50_ms:' | awk '{print $2}')" + p95_ms="$(echo "$run_out" | rg '^search_p95_ms:' | awk '{print $2}')" + recall="$(echo "$run_out" | rg '^mean_recall_at_k:' | awk '{print $2}')" + + printf "%s,%s,%s,%s,%s,%s,%s\n" \ + "$algorithm" \ + "$residuals" \ + "$n_probe" \ + "$build_ms" \ + "$p50_ms" \ + "$p95_ms" \ + "$recall" >>"$CSV_OUT" +} + +for n_probe in $N_PROBES; do + run_case "ivf" "na" "$n_probe" + run_case "ivf_pq" "true" "$n_probe" + run_case "ivf_pq" "false" "$n_probe" +done + +{ + echo "raw_output=${RAW_OUT}" + echo "csv_output=${CSV_OUT}" + echo "SUMMARY (best recall then p95 latency):" + echo "algorithm,residuals,n_probe,build_elapsed_ms,search_p50_ms,search_p95_ms,mean_recall_at_k" + tail -n +2 "$CSV_OUT" | sort -t, -k7,7gr -k6,6g +} | tee -a "$RAW_OUT" diff --git a/ray-rs/scripts/vector-ann-pq-tuning.sh b/ray-rs/scripts/vector-ann-pq-tuning.sh new file mode 100755 index 0000000..90bf525 --- /dev/null +++ b/ray-rs/scripts/vector-ann-pq-tuning.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" +STAMP="${STAMP:-$(date +%F)}" + +VECTORS="${VECTORS:-20000}" +DIMENSIONS="${DIMENSIONS:-384}" +QUERIES="${QUERIES:-200}" +K="${K:-10}" +N_CLUSTERS="${N_CLUSTERS:-}" +N_PROBES="${N_PROBES:-8 16}" +PQ_SUBSPACES_SET="${PQ_SUBSPACES_SET:-24 48}" +PQ_CENTROIDS_SET="${PQ_CENTROIDS_SET:-128 256}" +RESIDUALS_SET="${RESIDUALS_SET:-false}" +SEED="${SEED:-42}" + +mkdir -p "$OUT_DIR" +RAW_OUT="$OUT_DIR/${STAMP}-vector-ann-pq-tuning.txt" +CSV_OUT="$OUT_DIR/${STAMP}-vector-ann-pq-tuning.csv" + +echo "Vector ANN PQ tuning benchmark" >"$RAW_OUT" +echo "date=${STAMP}" >>"$RAW_OUT" +echo "vectors=${VECTORS} dimensions=${DIMENSIONS} queries=${QUERIES} k=${K}" >>"$RAW_OUT" +echo "n_probes={${N_PROBES}}" >>"$RAW_OUT" +echo "pq_subspaces_set={${PQ_SUBSPACES_SET}}" >>"$RAW_OUT" +echo "pq_centroids_set={${PQ_CENTROIDS_SET}}" >>"$RAW_OUT" +echo "residuals_set={${RESIDUALS_SET}}" >>"$RAW_OUT" +echo "seed=${SEED}" >>"$RAW_OUT" +echo >>"$RAW_OUT" + +printf "algorithm,residuals,n_probe,pq_subspaces,pq_centroids,build_elapsed_ms,search_p50_ms,search_p95_ms,mean_recall_at_k,recall_ratio_vs_ivf,p95_ratio_vs_ivf\n" >"$CSV_OUT" + +declare -A IVF_BASE_RECALL +declare -A IVF_BASE_P95 + +run_ann() { + local algorithm="$1" + local residuals="$2" + local n_probe="$3" + local pq_subspaces="$4" + local pq_centroids="$5" + + local extra_args=() + if [[ -n "$N_CLUSTERS" ]]; then + extra_args+=(--n-clusters "$N_CLUSTERS") + fi + if [[ "$algorithm" == "ivf_pq" ]]; then + extra_args+=(--pq-subspaces "$pq_subspaces" --pq-centroids "$pq_centroids" --residuals "$residuals") + fi + + ( + cd "$ROOT_DIR" + cargo run --release --no-default-features --example vector_ann_bench -- \ + --algorithm "$algorithm" \ + --vectors "$VECTORS" \ + --dimensions "$DIMENSIONS" \ + --queries "$QUERIES" \ + --k "$K" \ + --n-probe "$n_probe" \ + --seed "$SEED" \ + "${extra_args[@]}" + ) +} + +for n_probe in $N_PROBES; do + echo "RUN baseline algorithm=ivf n_probe=${n_probe}" | tee -a "$RAW_OUT" + ivf_out="$(run_ann "ivf" "na" "$n_probe" "na" "na")" + echo "$ivf_out" >>"$RAW_OUT" + echo >>"$RAW_OUT" + + ivf_build="$(echo "$ivf_out" | rg '^build_elapsed_ms:' | awk '{print $2}')" + ivf_p50="$(echo "$ivf_out" | rg '^search_p50_ms:' | awk '{print $2}')" + ivf_p95="$(echo "$ivf_out" | rg '^search_p95_ms:' | awk '{print $2}')" + ivf_recall="$(echo "$ivf_out" | rg '^mean_recall_at_k:' | awk '{print $2}')" + + IVF_BASE_RECALL["$n_probe"]="$ivf_recall" + IVF_BASE_P95["$n_probe"]="$ivf_p95" + + printf "ivf,na,%s,na,na,%s,%s,%s,%s,1.000000,1.000000\n" \ + "$n_probe" "$ivf_build" "$ivf_p50" "$ivf_p95" "$ivf_recall" >>"$CSV_OUT" +done + +for n_probe in $N_PROBES; do + ivf_recall="${IVF_BASE_RECALL[$n_probe]}" + ivf_p95="${IVF_BASE_P95[$n_probe]}" + + for residuals in $RESIDUALS_SET; do + for pq_subspaces in $PQ_SUBSPACES_SET; do + for pq_centroids in $PQ_CENTROIDS_SET; do + echo "RUN algorithm=ivf_pq residuals=${residuals} n_probe=${n_probe} pq_subspaces=${pq_subspaces} pq_centroids=${pq_centroids}" | tee -a "$RAW_OUT" + pq_out="$(run_ann "ivf_pq" "$residuals" "$n_probe" "$pq_subspaces" "$pq_centroids")" + echo "$pq_out" >>"$RAW_OUT" + echo >>"$RAW_OUT" + + pq_build="$(echo "$pq_out" | rg '^build_elapsed_ms:' | awk '{print $2}')" + pq_p50="$(echo "$pq_out" | rg '^search_p50_ms:' | awk '{print $2}')" + pq_p95="$(echo "$pq_out" | rg '^search_p95_ms:' | awk '{print $2}')" + pq_recall="$(echo "$pq_out" | rg '^mean_recall_at_k:' | awk '{print $2}')" + + recall_ratio="$(awk -v pq="$pq_recall" -v ivf="$ivf_recall" 'BEGIN { if (ivf <= 0) { print "0.000000" } else { printf "%.6f", pq / ivf } }')" + p95_ratio="$(awk -v pq="$pq_p95" -v ivf="$ivf_p95" 'BEGIN { if (ivf <= 0) { print "0.000000" } else { printf "%.6f", pq / ivf } }')" + + printf "ivf_pq,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" \ + "$residuals" "$n_probe" "$pq_subspaces" "$pq_centroids" \ + "$pq_build" "$pq_p50" "$pq_p95" "$pq_recall" "$recall_ratio" "$p95_ratio" >>"$CSV_OUT" + done + done + done +done + +{ + echo "raw_output=${RAW_OUT}" + echo "csv_output=${CSV_OUT}" + echo "SUMMARY (best PQ configs by recall_ratio, then p95_ratio):" + echo "algorithm,residuals,n_probe,pq_subspaces,pq_centroids,build_elapsed_ms,search_p50_ms,search_p95_ms,mean_recall_at_k,recall_ratio_vs_ivf,p95_ratio_vs_ivf" + awk -F, 'NR == 1 || $1 == "ivf_pq"' "$CSV_OUT" | tail -n +2 | sort -t, -k3,3n -k10,10gr -k11,11g +} | tee -a "$RAW_OUT" diff --git a/ray-rs/scripts/vector-compaction-matrix.sh b/ray-rs/scripts/vector-compaction-matrix.sh new file mode 100755 index 0000000..f142083 --- /dev/null +++ b/ray-rs/scripts/vector-compaction-matrix.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +OUT_DIR="${OUT_DIR:-$ROOT_DIR/../docs/benchmarks/results}" +STAMP="${STAMP:-$(date +%F)}" + +VECTORS="${VECTORS:-50000}" +DIMENSIONS="${DIMENSIONS:-384}" +FRAGMENT_TARGET_SIZE="${FRAGMENT_TARGET_SIZE:-5000}" +MIN_DELETION_RATIOS="${MIN_DELETION_RATIOS:-0.20 0.30 0.40}" +MAX_FRAGMENTS_SET="${MAX_FRAGMENTS_SET:-2 4 8}" +DELETE_RATIOS="${DELETE_RATIOS:-0.35 0.55}" +MIN_VECTORS_TO_COMPACT="${MIN_VECTORS_TO_COMPACT:-10000}" + +mkdir -p "$OUT_DIR" +RAW_OUT="$OUT_DIR/${STAMP}-vector-compaction-matrix.txt" +CSV_OUT="$OUT_DIR/${STAMP}-vector-compaction-matrix.csv" + +echo "Vector compaction matrix benchmark" >"$RAW_OUT" +echo "date=${STAMP}" >>"$RAW_OUT" +echo "vectors=${VECTORS} dimensions=${DIMENSIONS} fragment_target_size=${FRAGMENT_TARGET_SIZE}" >>"$RAW_OUT" +echo "delete_ratios={${DELETE_RATIOS}}" >>"$RAW_OUT" +echo "min_deletion_ratios={${MIN_DELETION_RATIOS}}" >>"$RAW_OUT" +echo "max_fragments_set={${MAX_FRAGMENTS_SET}}" >>"$RAW_OUT" +echo "min_vectors_to_compact=${MIN_VECTORS_TO_COMPACT}" >>"$RAW_OUT" +echo >>"$RAW_OUT" + +printf "delete_ratio,min_deletion_ratio,max_fragments,min_vectors_to_compact,compaction_performed,compaction_elapsed_ms,bytes_before,bytes_after,reclaim_percent,fragments_before,fragments_after\n" >"$CSV_OUT" + +for delete_ratio in $DELETE_RATIOS; do + for min_deletion_ratio in $MIN_DELETION_RATIOS; do + for max_fragments in $MAX_FRAGMENTS_SET; do + echo "RUN delete_ratio=${delete_ratio} min_del=${min_deletion_ratio} max_frag=${max_fragments}" | tee -a "$RAW_OUT" + run_out="$( + cd "$ROOT_DIR" + cargo run --release --no-default-features --example vector_compaction_bench -- \ + --vectors "$VECTORS" \ + --dimensions "$DIMENSIONS" \ + --fragment-target-size "$FRAGMENT_TARGET_SIZE" \ + --delete-ratio "$delete_ratio" \ + --min-deletion-ratio "$min_deletion_ratio" \ + --max-fragments "$max_fragments" \ + --min-vectors-to-compact "$MIN_VECTORS_TO_COMPACT" + )" + echo "$run_out" >>"$RAW_OUT" + echo >>"$RAW_OUT" + + compaction_performed="$(echo "$run_out" | rg '^compaction_performed:' | awk '{print $2}')" + elapsed_ms="$(echo "$run_out" | rg '^compaction_elapsed_ms:' | awk '{print $2}')" + bytes_line="$(echo "$run_out" | rg '^ bytes_used:')" + bytes_before="$(echo "$bytes_line" | awk -F': ' '{print $2}' | awk -F' -> ' '{print $1}' | tr -d ',')" + bytes_after="$(echo "$bytes_line" | awk -F' -> ' '{print $2}' | tr -d ',')" + fragments_line="$(echo "$run_out" | rg '^ fragments_needing_compaction:')" + fragments_before="$(echo "$fragments_line" | awk -F': ' '{print $2}' | awk -F' -> ' '{print $1}')" + fragments_after="$(echo "$fragments_line" | awk -F' -> ' '{print $2}')" + reclaim_percent="$(awk -v b="$bytes_before" -v a="$bytes_after" 'BEGIN { if (b<=0) {print "0.00"} else { printf "%.2f", ((b-a)/b)*100.0 } }')" + + printf "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" \ + "$delete_ratio" \ + "$min_deletion_ratio" \ + "$max_fragments" \ + "$MIN_VECTORS_TO_COMPACT" \ + "$compaction_performed" \ + "$elapsed_ms" \ + "$bytes_before" \ + "$bytes_after" \ + "$reclaim_percent" \ + "$fragments_before" \ + "$fragments_after" >>"$CSV_OUT" + done + done +done + +{ + echo "raw_output=${RAW_OUT}" + echo "csv_output=${CSV_OUT}" + echo "SUMMARY (mean by strategy):" + awk -F, ' + NR > 1 { + key = $2 "," $3 + count[key]++ + elapsed[key] += $6 + reclaim[key] += $9 + compaction[key] += ($5 == "true" ? 1 : 0) + } + END { + print "min_deletion_ratio,max_fragments,runs,mean_compaction_elapsed_ms,mean_reclaim_percent,compaction_performed_ratio" + for (k in count) { + split(k, parts, ",") + printf "%s,%s,%d,%.3f,%.3f,%.3f\n", parts[1], parts[2], count[k], elapsed[k] / count[k], reclaim[k] / count[k], compaction[k] / count[k] + } + } + ' "$CSV_OUT" | { + IFS= read -r header + echo "$header" + sort -t, -k1,1 -k2,2n + } +} | tee -a "$RAW_OUT" diff --git a/ray-rs/src/api/kite.rs b/ray-rs/src/api/kite.rs index 1412edf..6790335 100644 --- a/ray-rs/src/api/kite.rs +++ b/ray-rs/src/api/kite.rs @@ -12,10 +12,12 @@ //! Ported from src/api/kite.ts use crate::core::single_file::{ - close_single_file, is_single_file_path, open_single_file, single_file_extension, FullEdge, - SingleFileDB, SingleFileOpenOptions, SyncMode, + close_single_file, close_single_file_with_options, is_single_file_path, open_single_file, + single_file_extension, FullEdge, SingleFileCloseOptions, SingleFileDB, SingleFileOpenOptions, + SyncMode, }; use crate::error::{KiteError, Result}; +use crate::replication::types::ReplicationRole; use crate::types::*; use std::collections::{HashMap, HashSet}; @@ -565,6 +567,22 @@ pub struct KiteOptions { pub wal_size: Option, /// WAL usage threshold (0.0-1.0) to trigger auto-checkpoint pub checkpoint_threshold: Option, + /// Close-time WAL usage threshold (0.0-1.0) to trigger blocking checkpoint + pub close_checkpoint_if_wal_usage_at_least: Option, + /// Replication role (disabled | primary | replica) + pub replication_role: ReplicationRole, + /// Optional replication sidecar path override + pub replication_sidecar_path: Option, + /// Source primary db path (replica role only) + pub replication_source_db_path: Option, + /// Source primary sidecar path override (replica role only) + pub replication_source_sidecar_path: Option, + /// Segment rotation threshold in bytes (primary role only) + pub replication_segment_max_bytes: Option, + /// Minimum retained entries window (primary role only) + pub replication_retention_min_entries: Option, + /// Minimum retained segment age in milliseconds (primary role only) + pub replication_retention_min_ms: Option, } impl KiteOptions { @@ -583,6 +601,14 @@ impl KiteOptions { mvcc_max_chain_depth: None, wal_size: None, checkpoint_threshold: None, + close_checkpoint_if_wal_usage_at_least: Some(0.2), + replication_role: ReplicationRole::Disabled, + replication_sidecar_path: None, + replication_source_db_path: None, + replication_source_sidecar_path: None, + replication_segment_max_bytes: None, + replication_retention_min_entries: None, + replication_retention_min_ms: None, } } @@ -667,6 +693,92 @@ impl KiteOptions { self.checkpoint_threshold = Some(value.clamp(0.0, 1.0)); self } + + /// Set close-time checkpoint threshold (0.0-1.0). + /// + /// When set, `Kite::close()` checkpoints if WAL usage is at or above this threshold. + pub fn close_checkpoint_if_wal_usage_at_least(mut self, value: f64) -> Self { + self.close_checkpoint_if_wal_usage_at_least = Some(value.clamp(0.0, 1.0)); + self + } + + /// Disable close-time checkpointing in `Kite::close()`. + pub fn disable_close_checkpoint(mut self) -> Self { + self.close_checkpoint_if_wal_usage_at_least = None; + self + } + + /// Set replication role (disabled | primary | replica) + pub fn replication_role(mut self, role: ReplicationRole) -> Self { + self.replication_role = role; + self + } + + /// Set replication sidecar path (for primary/replica modes) + pub fn replication_sidecar_path>(mut self, path: P) -> Self { + self.replication_sidecar_path = Some(path.as_ref().to_path_buf()); + self + } + + /// Set replication source db path (replica role only) + pub fn replication_source_db_path>(mut self, path: P) -> Self { + self.replication_source_db_path = Some(path.as_ref().to_path_buf()); + self + } + + /// Set replication source sidecar path (replica role only) + pub fn replication_source_sidecar_path>(mut self, path: P) -> Self { + self.replication_source_sidecar_path = Some(path.as_ref().to_path_buf()); + self + } + + /// Set replication segment rotation threshold in bytes (primary role only) + pub fn replication_segment_max_bytes(mut self, value: u64) -> Self { + self.replication_segment_max_bytes = Some(value); + self + } + + /// Set retention minimum entries to keep when pruning (primary role only) + pub fn replication_retention_min_entries(mut self, value: u64) -> Self { + self.replication_retention_min_entries = Some(value); + self + } + + /// Set retention minimum segment age in milliseconds (primary role only) + pub fn replication_retention_min_ms(mut self, value: u64) -> Self { + self.replication_retention_min_ms = Some(value); + self + } + + /// Recommended conservative profile (durability-first). + pub fn recommended_safe() -> Self { + Self::new() + .sync_mode(SyncMode::Full) + .group_commit_enabled(false) + .checkpoint_threshold(0.5) + } + + /// Recommended balanced profile (good throughput + durability tradeoff). + pub fn recommended_balanced() -> Self { + Self::new() + .sync_mode(SyncMode::Normal) + .group_commit_enabled(true) + .group_commit_window_ms(2) + .wal_size_mb(64) + .checkpoint_threshold(0.5) + } + + /// Recommended profile for reopen-heavy workloads. + /// + /// Uses a smaller WAL and lower auto-checkpoint threshold to cap replay cost on reopen. + pub fn recommended_reopen_heavy() -> Self { + Self::new() + .sync_mode(SyncMode::Normal) + .group_commit_enabled(true) + .group_commit_window_ms(2) + .wal_size_mb(16) + .checkpoint_threshold(0.2) + } } impl Default for KiteOptions { @@ -675,6 +787,59 @@ impl Default for KiteOptions { } } +/// Preset runtime profile flavors for KiteDB. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum KiteRuntimeProfileKind { + /// Durability-first defaults. + Safe, + /// Balanced throughput defaults. + Balanced, + /// Reopen-heavy defaults (lower WAL replay risk). + ReopenHeavy, +} + +/// Runtime profile: open options + optional close policy. +#[derive(Debug, Clone)] +pub struct KiteRuntimeProfile { + /// Open-time options profile. + pub options: KiteOptions, + /// Optional close-time checkpoint trigger threshold. + /// + /// When set, call `Kite::close_with_checkpoint_if_wal_over(threshold)`. + pub close_checkpoint_if_wal_usage_at_least: Option, +} + +impl KiteRuntimeProfile { + pub fn from_kind(kind: KiteRuntimeProfileKind) -> Self { + match kind { + KiteRuntimeProfileKind::Safe => Self::safe(), + KiteRuntimeProfileKind::Balanced => Self::balanced(), + KiteRuntimeProfileKind::ReopenHeavy => Self::reopen_heavy(), + } + } + + pub fn safe() -> Self { + Self { + options: KiteOptions::recommended_safe(), + close_checkpoint_if_wal_usage_at_least: None, + } + } + + pub fn balanced() -> Self { + Self { + options: KiteOptions::recommended_balanced(), + close_checkpoint_if_wal_usage_at_least: None, + } + } + + pub fn reopen_heavy() -> Self { + Self { + options: KiteOptions::recommended_reopen_heavy(), + close_checkpoint_if_wal_usage_at_least: Some(0.2), + } + } +} + /// Convenience helper to open a KiteDB instance. pub fn kite>(path: P, options: KiteOptions) -> Result { Kite::open(path, options) @@ -688,6 +853,8 @@ pub fn kite>(path: P, options: KiteOptions) -> Result { pub struct Kite { /// Underlying database db: SingleFileDB, + /// Close-time checkpoint threshold. + close_checkpoint_if_wal_usage_at_least: Option, /// Node type definitions by name nodes: HashMap, /// Edge type definitions by name @@ -722,13 +889,18 @@ impl Kite { db_path = PathBuf::from(format!("{}{}", path.display(), single_file_extension())); } + let close_checkpoint_if_wal_usage_at_least = options + .close_checkpoint_if_wal_usage_at_least + .map(|value| value.clamp(0.0, 1.0)); + let mut db_options = SingleFileOpenOptions::new() .read_only(options.read_only) .create_if_missing(options.create_if_missing) .sync_mode(options.sync_mode) .group_commit_enabled(options.group_commit_enabled) .group_commit_window_ms(options.group_commit_window_ms) - .mvcc(options.mvcc); + .mvcc(options.mvcc) + .replication_role(options.replication_role); if let Some(v) = options.mvcc_gc_interval_ms { db_options = db_options.mvcc_gc_interval_ms(v); } @@ -744,6 +916,24 @@ impl Kite { if let Some(v) = options.checkpoint_threshold { db_options = db_options.checkpoint_threshold(v); } + if let Some(path) = options.replication_sidecar_path.as_ref() { + db_options = db_options.replication_sidecar_path(path); + } + if let Some(path) = options.replication_source_db_path.as_ref() { + db_options = db_options.replication_source_db_path(path); + } + if let Some(path) = options.replication_source_sidecar_path.as_ref() { + db_options = db_options.replication_source_sidecar_path(path); + } + if let Some(v) = options.replication_segment_max_bytes { + db_options = db_options.replication_segment_max_bytes(v); + } + if let Some(v) = options.replication_retention_min_entries { + db_options = db_options.replication_retention_min_entries(v); + } + if let Some(v) = options.replication_retention_min_ms { + db_options = db_options.replication_retention_min_ms(v); + } let db = open_single_file(&db_path, db_options)?; // Initialize schema in a transaction @@ -784,6 +974,7 @@ impl Kite { Ok(Self { db, + close_checkpoint_if_wal_usage_at_least, nodes, edges, key_prefix_to_node, @@ -2043,7 +2234,24 @@ impl Kite { /// Close the database pub fn close(self) -> Result<()> { - close_single_file(self.db) + match self.close_checkpoint_if_wal_usage_at_least { + Some(threshold) => close_single_file_with_options( + self.db, + SingleFileCloseOptions::new().checkpoint_if_wal_usage_at_least(threshold), + ), + None => close_single_file(self.db), + } + } + + /// Close the database and run a blocking checkpoint if WAL usage is above threshold. + /// + /// Use this for reopen-heavy workloads where you want to cap WAL replay cost on next open. + /// Threshold is clamped to [0.0, 1.0]. + pub fn close_with_checkpoint_if_wal_over(self, threshold: f64) -> Result<()> { + close_single_file_with_options( + self.db, + SingleFileCloseOptions::new().checkpoint_if_wal_usage_at_least(threshold), + ) } } @@ -3724,6 +3932,47 @@ mod tests { .edge(authored) } + #[test] + fn test_recommended_kite_options_profiles() { + let safe = KiteOptions::recommended_safe(); + assert_eq!(safe.sync_mode, SyncMode::Full); + assert!(!safe.group_commit_enabled); + assert_eq!(safe.close_checkpoint_if_wal_usage_at_least, Some(0.2)); + + let balanced = KiteOptions::recommended_balanced(); + assert_eq!(balanced.sync_mode, SyncMode::Normal); + assert!(balanced.group_commit_enabled); + assert_eq!(balanced.group_commit_window_ms, 2); + assert_eq!(balanced.wal_size, Some(64 * 1024 * 1024)); + assert_eq!(balanced.checkpoint_threshold, Some(0.5)); + assert_eq!(balanced.close_checkpoint_if_wal_usage_at_least, Some(0.2)); + + let reopen = KiteOptions::recommended_reopen_heavy(); + assert_eq!(reopen.sync_mode, SyncMode::Normal); + assert!(reopen.group_commit_enabled); + assert_eq!(reopen.wal_size, Some(16 * 1024 * 1024)); + assert_eq!(reopen.checkpoint_threshold, Some(0.2)); + assert_eq!(reopen.close_checkpoint_if_wal_usage_at_least, Some(0.2)); + } + + #[test] + fn test_runtime_profile_reopen_heavy_has_close_threshold() { + let profile = KiteRuntimeProfile::from_kind(KiteRuntimeProfileKind::ReopenHeavy); + assert_eq!(profile.options.wal_size, Some(16 * 1024 * 1024)); + assert_eq!(profile.close_checkpoint_if_wal_usage_at_least, Some(0.2)); + } + + #[test] + fn test_kite_options_close_checkpoint_threshold_configurable() { + let options = KiteOptions::new() + .close_checkpoint_if_wal_usage_at_least(0.35) + .disable_close_checkpoint(); + assert_eq!(options.close_checkpoint_if_wal_usage_at_least, None); + + let clamped = KiteOptions::new().close_checkpoint_if_wal_usage_at_least(1.5); + assert_eq!(clamped.close_checkpoint_if_wal_usage_at_least, Some(1.0)); + } + #[test] fn test_open_database() { let temp_dir = tempdir().expect("expected value"); @@ -3739,6 +3988,27 @@ mod tests { ray.close().expect("expected value"); } + #[test] + fn test_open_database_primary_replication_options() { + let temp_dir = tempdir().expect("expected value"); + let sidecar_path = temp_dir.path().join("replication-sidecar-custom"); + let options = create_test_schema() + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(&sidecar_path) + .replication_segment_max_bytes(1024) + .replication_retention_min_entries(2); + + let ray = Kite::open(temp_db_path(&temp_dir), options).expect("expected value"); + let primary = ray.raw().primary_replication_status(); + let replica = ray.raw().replica_replication_status(); + + assert!(primary.is_some()); + assert!(replica.is_none()); + let status = primary.expect("expected primary status"); + assert_eq!(status.role, ReplicationRole::Primary); + assert_eq!(status.sidecar_path, sidecar_path); + } + #[test] fn test_create_and_find_node() { let temp_dir = tempdir().expect("expected value"); diff --git a/ray-rs/src/api/vector_search.rs b/ray-rs/src/api/vector_search.rs index b3b2a71..2b70bcf 100644 --- a/ray-rs/src/api/vector_search.rs +++ b/ray-rs/src/api/vector_search.rs @@ -12,7 +12,8 @@ use crate::types::NodeId; use crate::vector::{ create_vector_store, vector_store_clear, vector_store_delete, vector_store_insert, vector_store_node_vector, vector_store_stats, DistanceMetric, IvfConfig, IvfError, IvfIndex, - SearchOptions, VectorManifest, VectorSearchResult, VectorStoreConfig, + IvfPqConfig, IvfPqError, IvfPqIndex, IvfPqSearchOptions, SearchOptions, VectorManifest, + VectorSearchResult, VectorStoreConfig, }; // ============================================================================ @@ -23,6 +24,15 @@ const DEFAULT_CACHE_MAX_SIZE: usize = 10_000; const DEFAULT_TRAINING_THRESHOLD: usize = 1000; const MIN_CLUSTERS: usize = 16; const MAX_CLUSTERS: usize = 1024; +const DEFAULT_PQ_SUBSPACES: usize = 48; +const DEFAULT_PQ_CENTROIDS: usize = 256; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum AnnAlgorithm { + Ivf, + #[default] + IvfPq, +} // ============================================================================ // Types @@ -49,6 +59,14 @@ pub struct VectorIndexOptions { pub training_threshold: usize, /// Maximum node refs to cache for search results (default: 10_000) pub cache_max_size: usize, + /// ANN backend algorithm (default: IVF-PQ) + pub ann_algorithm: AnnAlgorithm, + /// PQ subspaces for IVF-PQ (default: 48) + pub pq_subspaces: usize, + /// PQ centroids per subspace for IVF-PQ (default: 256) + pub pq_centroids: usize, + /// Use residual encoding for IVF-PQ (default: false) + pub pq_residuals: bool, } impl Default for VectorIndexOptions { @@ -63,6 +81,10 @@ impl Default for VectorIndexOptions { n_probe: 10, training_threshold: DEFAULT_TRAINING_THRESHOLD, cache_max_size: DEFAULT_CACHE_MAX_SIZE, + ann_algorithm: AnnAlgorithm::default(), + pq_subspaces: DEFAULT_PQ_SUBSPACES, + pq_centroids: DEFAULT_PQ_CENTROIDS, + pq_residuals: false, } } } @@ -129,6 +151,30 @@ impl VectorIndexOptions { self.cache_max_size = size; self } + + /// Set ANN backend algorithm. + pub fn with_ann_algorithm(mut self, algorithm: AnnAlgorithm) -> Self { + self.ann_algorithm = algorithm; + self + } + + /// Set PQ subspaces for IVF-PQ backend. + pub fn with_pq_subspaces(mut self, subspaces: usize) -> Self { + self.pq_subspaces = subspaces.max(1); + self + } + + /// Set PQ centroids per subspace for IVF-PQ backend. + pub fn with_pq_centroids(mut self, centroids: usize) -> Self { + self.pq_centroids = centroids.max(2); + self + } + + /// Set residual encoding mode for IVF-PQ backend. + pub fn with_pq_residuals(mut self, residuals: bool) -> Self { + self.pq_residuals = residuals; + self + } } /// Options for similarity search @@ -258,11 +304,32 @@ pub struct VectorIndexStats { /// # Ok(()) /// # } /// ``` +enum BuiltIndex { + Ivf(IvfIndex), + IvfPq(IvfPqIndex), +} + +impl BuiltIndex { + fn trained(&self) -> bool { + match self { + BuiltIndex::Ivf(index) => index.trained, + BuiltIndex::IvfPq(index) => index.trained, + } + } + + fn n_clusters(&self) -> usize { + match self { + BuiltIndex::Ivf(index) => index.config.n_clusters, + BuiltIndex::IvfPq(index) => index.config.ivf.n_clusters, + } + } +} + pub struct VectorIndex { /// The underlying vector store manifest manifest: VectorManifest, - /// IVF index for approximate search (None if not trained) - index: Option, + /// ANN index for approximate search (None if not trained) + index: Option, /// Cache of node IDs for quick lookup node_cache: LruCache, /// Node ID to vector ID mapping for cache lookups @@ -317,9 +384,16 @@ impl VectorIndex { // Check if we need to delete from index first if let Some(&existing_vector_id) = self.manifest.node_to_vector.get(&node_id) { if let Some(ref mut index) = self.index { - if index.trained { + if index.trained() { if let Some(existing_vector) = vector_store_node_vector(&self.manifest, node_id) { - index.delete(existing_vector_id, existing_vector); + match index { + BuiltIndex::Ivf(ivf_index) => { + ivf_index.delete(existing_vector_id, existing_vector); + } + BuiltIndex::IvfPq(ivf_pq_index) => { + ivf_pq_index.delete(existing_vector_id, existing_vector); + } + } } } } @@ -335,12 +409,20 @@ impl VectorIndex { // Add to index if trained, otherwise mark for training if let Some(ref mut index) = self.index { - if index.trained { + if index.trained() { if let Some(stored_vector) = vector_store_node_vector(&self.manifest, node_id) { - if let Err(err) = index.insert(vector_id as u64, stored_vector) { + let insert_result = match index { + BuiltIndex::Ivf(ivf_index) => ivf_index + .insert(vector_id as u64, stored_vector) + .map_err(ivf_error_to_index_error), + BuiltIndex::IvfPq(ivf_pq_index) => ivf_pq_index + .insert(vector_id as u64, stored_vector) + .map_err(ivf_pq_error_to_index_error), + }; + if let Err(err) = insert_result { self.index = None; self.needs_training = true; - return Err(ivf_error_to_index_error(err)); + return Err(err); } } } else { @@ -369,10 +451,17 @@ impl VectorIndex { // Remove from index if trained if let Some(ref mut index) = self.index { - if index.trained { + if index.trained() { if let Some(&vector_id) = self.manifest.node_to_vector.get(&node_id) { if let Some(vector) = vector_store_node_vector(&self.manifest, node_id) { - index.delete(vector_id, vector); + match index { + BuiltIndex::Ivf(ivf_index) => { + ivf_index.delete(vector_id, vector); + } + BuiltIndex::IvfPq(ivf_pq_index) => { + ivf_pq_index.delete(vector_id, vector); + } + } } } } @@ -392,10 +481,10 @@ impl VectorIndex { self.manifest.node_to_vector.contains_key(&node_id) } - /// Build/rebuild the IVF index for faster search + /// Build/rebuild the configured ANN index for faster search /// /// Call this after bulk loading vectors, or periodically as vectors are updated. - /// Uses k-means clustering for approximate nearest neighbor search. + /// Uses IVF or IVF-PQ based on configured ANN backend. /// /// Note: Modifications (set/delete) are blocked while building is in progress. pub fn build_index(&mut self) -> Result<(), VectorIndexError> { @@ -438,32 +527,63 @@ impl VectorIndex { } } - // Create and train the index - let ivf_config = IvfConfig::new(n_clusters) - .with_n_probe(self.options.n_probe) - .with_metric(self.options.metric); - let mut index = IvfIndex::new(dimensions, ivf_config); - - index - .add_training_vectors(&training_data, vector_ids.len()) - .map_err(|e| VectorIndexError::TrainingError(e.to_string()))?; - - index - .train() - .map_err(|e| VectorIndexError::TrainingError(e.to_string()))?; - - // Insert all vectors into the trained index - for (i, &vector_id) in vector_ids.iter().enumerate() { - let offset = i * dimensions; - let vector = &training_data[offset..offset + dimensions]; - if let Err(err) = index.insert(vector_id, vector) { - self.index = None; - self.needs_training = true; - return Err(ivf_error_to_index_error(err)); + // Create and train the configured ANN index. + self.index = Some(match self.options.ann_algorithm { + AnnAlgorithm::Ivf => { + let ivf_config = IvfConfig::new(n_clusters) + .with_n_probe(self.options.n_probe) + .with_metric(self.options.metric); + let mut index = IvfIndex::new(dimensions, ivf_config); + + index + .add_training_vectors(&training_data, vector_ids.len()) + .map_err(|e| VectorIndexError::TrainingError(e.to_string()))?; + + index + .train() + .map_err(|e| VectorIndexError::TrainingError(e.to_string()))?; + + for (i, &vector_id) in vector_ids.iter().enumerate() { + let offset = i * dimensions; + let vector = &training_data[offset..offset + dimensions]; + if let Err(err) = index.insert(vector_id, vector) { + self.index = None; + self.needs_training = true; + return Err(ivf_error_to_index_error(err)); + } + } + BuiltIndex::Ivf(index) } - } - - self.index = Some(index); + AnnAlgorithm::IvfPq => { + let pq_subspaces = resolve_pq_subspaces(self.options.pq_subspaces, dimensions); + let pq_centroids = self.options.pq_centroids.max(2).min(live_vectors.max(2)); + let ivf_pq_config = IvfPqConfig::new() + .with_n_clusters(n_clusters) + .with_n_probe(self.options.n_probe) + .with_metric(self.options.metric) + .with_num_subspaces(pq_subspaces) + .with_num_centroids(pq_centroids) + .with_residuals(self.options.pq_residuals); + let mut index = + IvfPqIndex::new(dimensions, ivf_pq_config).map_err(ivf_pq_error_to_index_error)?; + + index + .add_training_vectors(&training_data, vector_ids.len()) + .map_err(ivf_pq_error_to_index_error)?; + index.train().map_err(ivf_pq_error_to_index_error)?; + + for (i, &vector_id) in vector_ids.iter().enumerate() { + let offset = i * dimensions; + let vector = &training_data[offset..offset + dimensions]; + if let Err(err) = index.insert(vector_id, vector) { + self.index = None; + self.needs_training = true; + return Err(ivf_pq_error_to_index_error(err)); + } + } + BuiltIndex::IvfPq(index) + } + }); self.needs_training = false; Ok(()) @@ -472,7 +592,7 @@ impl VectorIndex { /// Search for similar vectors /// /// Returns the k most similar nodes to the query vector. - /// Uses IVF index if available, otherwise falls back to brute force. + /// Uses configured ANN index if available, otherwise falls back to brute force. pub fn search( &mut self, query: &[f32], @@ -507,19 +627,33 @@ impl VectorIndex { let n_probe = n_probe.unwrap_or(self.options.n_probe); let results: Vec = if let Some(ref index) = self.index { - if index.trained { - // Use IVF index for approximate search (push down threshold/filter) - let filter_box = filter.as_ref().map(|f| { - let f = Arc::clone(f); - Box::new(move |node_id: NodeId| f(node_id)) as Box bool> - }); - - let search_opts = SearchOptions { - n_probe: Some(n_probe), - filter: filter_box, - threshold, - }; - index.search(&self.manifest, query, k, Some(search_opts)) + if index.trained() { + match index { + BuiltIndex::Ivf(ivf_index) => { + let filter_box = filter.as_ref().map(|f| { + let f = Arc::clone(f); + Box::new(move |node_id: NodeId| f(node_id)) as Box bool> + }); + let search_opts = SearchOptions { + n_probe: Some(n_probe), + filter: filter_box, + threshold, + }; + ivf_index.search(&self.manifest, query, k, Some(search_opts)) + } + BuiltIndex::IvfPq(ivf_pq_index) => { + let filter_box = filter.as_ref().map(|f| { + let f = Arc::clone(f); + Box::new(move |node_id: NodeId| f(node_id)) as Box bool> + }); + let search_opts = IvfPqSearchOptions { + n_probe: Some(n_probe), + filter: filter_box, + threshold, + }; + ivf_pq_index.search(&self.manifest, query, k, Some(search_opts)) + } + } } else { self.brute_force_search_filtered(query, k, threshold, filter.as_ref()) } @@ -658,8 +792,12 @@ impl VectorIndex { live_vectors: store_stats.live_vectors, dimensions: self.options.dimensions, metric: self.options.metric, - index_trained: self.index.as_ref().map(|i| i.trained).unwrap_or(false), - index_clusters: self.index.as_ref().map(|i| i.config.n_clusters), + index_trained: self + .index + .as_ref() + .map(BuiltIndex::trained) + .unwrap_or(false), + index_clusters: self.index.as_ref().map(BuiltIndex::n_clusters), } } @@ -752,6 +890,25 @@ fn ivf_error_to_index_error(err: IvfError) -> VectorIndexError { } } +fn ivf_pq_error_to_index_error(err: IvfPqError) -> VectorIndexError { + match err { + IvfPqError::DimensionMismatch { expected, got } => { + VectorIndexError::DimensionMismatch { expected, got } + } + other => VectorIndexError::TrainingError(other.to_string()), + } +} + +fn resolve_pq_subspaces(requested: usize, dimensions: usize) -> usize { + let capped = requested.max(1).min(dimensions.max(1)); + for candidate in (1..=capped).rev() { + if dimensions % candidate == 0 { + return candidate; + } + } + 1 +} + // ============================================================================ // Factory Function // ============================================================================ @@ -795,6 +952,10 @@ mod tests { assert_eq!(opts.metric, DistanceMetric::Cosine); assert!(opts.normalize); assert_eq!(opts.training_threshold, DEFAULT_TRAINING_THRESHOLD); + assert_eq!(opts.ann_algorithm, AnnAlgorithm::IvfPq); + assert_eq!(opts.pq_subspaces, DEFAULT_PQ_SUBSPACES); + assert_eq!(opts.pq_centroids, DEFAULT_PQ_CENTROIDS); + assert!(!opts.pq_residuals); } #[test] @@ -803,13 +964,21 @@ mod tests { .with_metric(DistanceMetric::Euclidean) .with_normalize(false) .with_n_probe(20) - .with_training_threshold(500); + .with_training_threshold(500) + .with_ann_algorithm(AnnAlgorithm::Ivf) + .with_pq_subspaces(32) + .with_pq_centroids(128) + .with_pq_residuals(true); assert_eq!(opts.dimensions, 512); assert_eq!(opts.metric, DistanceMetric::Euclidean); assert!(!opts.normalize); assert_eq!(opts.n_probe, 20); assert_eq!(opts.training_threshold, 500); + assert_eq!(opts.ann_algorithm, AnnAlgorithm::Ivf); + assert_eq!(opts.pq_subspaces, 32); + assert_eq!(opts.pq_centroids, 128); + assert!(opts.pq_residuals); } #[test] diff --git a/ray-rs/src/constants.rs b/ray-rs/src/constants.rs index 1912650..0bd261a 100644 --- a/ray-rs/src/constants.rs +++ b/ray-rs/src/constants.rs @@ -15,13 +15,13 @@ pub const MAGIC_SNAPSHOT: u32 = 0x31534447; // Current versions // ============================================================================ -pub const VERSION_SNAPSHOT: u32 = 3; +pub const VERSION_SNAPSHOT: u32 = 4; // ============================================================================ // Minimum reader versions // ============================================================================ -pub const MIN_READER_SNAPSHOT: u32 = 3; +pub const MIN_READER_SNAPSHOT: u32 = 4; // ============================================================================ // Alignment requirements diff --git a/ray-rs/src/core/single_file/checkpoint.rs b/ray-rs/src/core/single_file/checkpoint.rs index dbd6e38..a973b46 100644 --- a/ray-rs/src/core/single_file/checkpoint.rs +++ b/ray-rs/src/core/single_file/checkpoint.rs @@ -13,9 +13,9 @@ use crate::core::snapshot::writer::{ use crate::error::{KiteError, Result}; use crate::types::*; use crate::util::mmap::map_file; -use crate::vector::store::vector_store_node_vector; +use crate::vector::types::VectorManifest; -use super::vector::vector_stores_from_snapshot; +use super::vector::vector_store_state_from_snapshot; use super::{CheckpointStatus, SingleFileDB}; type GraphData = ( @@ -24,6 +24,7 @@ type GraphData = ( HashMap, HashMap, HashMap, + HashMap, ); impl SingleFileDB { @@ -50,7 +51,7 @@ impl SingleFileDB { } // Collect all graph data - let (nodes, edges, labels, etypes, propkeys) = self.collect_graph_data(); + let (nodes, edges, labels, etypes, propkeys, vector_stores) = self.collect_graph_data()?; // Get current header state let header = self.header.read().clone(); @@ -64,6 +65,7 @@ impl SingleFileDB { labels, etypes, propkeys, + vector_stores: Some(vector_stores), compression: self.checkpoint_compression.clone(), })?; @@ -128,6 +130,7 @@ impl SingleFileDB { // No snapshot to load *self.snapshot.write() = None; self.vector_stores.write().clear(); + self.vector_store_lazy_entries.write().clear(); return Ok(()); } @@ -150,8 +153,9 @@ impl SingleFileDB { // Rebuild vector stores from the new snapshot if let Some(ref snapshot) = *self.snapshot.read() { - let stores = vector_stores_from_snapshot(snapshot)?; + let (stores, lazy_entries) = vector_store_state_from_snapshot(snapshot)?; *self.vector_stores.write() = stores; + *self.vector_store_lazy_entries.write() = lazy_entries; } Ok(()) @@ -252,7 +256,7 @@ impl SingleFileDB { /// Returns (new_gen, new_snapshot_start_page, new_snapshot_page_count) fn build_and_write_snapshot(&self) -> Result<(u64, u64, u64)> { // Collect all graph data (reads from snapshot + delta) - let (nodes, edges, labels, etypes, propkeys) = self.collect_graph_data(); + let (nodes, edges, labels, etypes, propkeys, vector_stores) = self.collect_graph_data()?; // Get current header state let header = self.header.read().clone(); @@ -266,6 +270,7 @@ impl SingleFileDB { labels, etypes, propkeys, + vector_stores: Some(vector_stores), compression: self.checkpoint_compression.clone(), })?; @@ -414,7 +419,7 @@ impl SingleFileDB { } /// Collect all graph data from snapshot + delta - pub(crate) fn collect_graph_data(&self) -> GraphData { + pub(crate) fn collect_graph_data(&self) -> Result { let mut nodes = Vec::new(); let mut edges = Vec::new(); let mut labels = HashMap::new(); @@ -615,34 +620,28 @@ impl SingleFileDB { } } - // Merge vector embeddings into node props for snapshot persistence - if !self.vector_stores.read().is_empty() { - let mut node_index: HashMap = HashMap::new(); - for (idx, node) in nodes.iter().enumerate() { - node_index.insert(node.node_id, idx); - } - - let stores = self.vector_stores.read(); - for (&prop_key_id, store) in stores.iter() { - for &node_id in store.node_to_vector.keys() { - if delta.is_node_deleted(node_id) { - continue; - } - - let Some(&idx) = node_index.get(&node_id) else { - continue; - }; - - if let Some(vec) = vector_store_node_vector(store, node_id) { - nodes[idx] - .props - .insert(prop_key_id, PropValue::VectorF32(vec.to_vec())); - } - } + // Snapshot persistence now stores ANN vectors only in dedicated + // vector-store sections. Remove duplicate vector payloads from node props. + self.materialize_all_vector_stores()?; + let vector_stores_for_snapshot: HashMap = + self.vector_stores.read().clone(); + if !vector_stores_for_snapshot.is_empty() { + for node in &mut nodes { + node.props.retain(|prop_key_id, value| { + !(vector_stores_for_snapshot.contains_key(prop_key_id) + && matches!(value, PropValue::VectorF32(_))) + }); } } - (nodes, edges, labels, etypes, propkeys) + Ok(( + nodes, + edges, + labels, + etypes, + propkeys, + vector_stores_for_snapshot, + )) } /// Check if checkpoint is recommended based on WAL usage diff --git a/ray-rs/src/core/single_file/compactor.rs b/ray-rs/src/core/single_file/compactor.rs index 790d438..1cf81a0 100644 --- a/ray-rs/src/core/single_file/compactor.rs +++ b/ray-rs/src/core/single_file/compactor.rs @@ -76,7 +76,7 @@ impl SingleFileDB { } } - let (nodes, edges, labels, etypes, propkeys) = self.collect_graph_data(); + let (nodes, edges, labels, etypes, propkeys, vector_stores) = self.collect_graph_data()?; let header = self.header.read().clone(); let old_snapshot_start_page = header.snapshot_start_page; @@ -91,6 +91,7 @@ impl SingleFileDB { labels, etypes, propkeys, + vector_stores: Some(vector_stores), compression, })?; diff --git a/ray-rs/src/core/single_file/mod.rs b/ray-rs/src/core/single_file/mod.rs index c8803a8..79424ca 100644 --- a/ray-rs/src/core/single_file/mod.rs +++ b/ray-rs/src/core/single_file/mod.rs @@ -12,6 +12,7 @@ use std::thread::ThreadId; use parking_lot::{Condvar, Mutex, RwLock}; +use self::vector::VectorStoreLazyEntry; use crate::cache::manager::CacheManager; use crate::constants::*; use crate::core::pager::FilePager; @@ -31,6 +32,7 @@ mod iter; mod open; mod read; mod recovery; +mod replication; mod schema; mod transaction; mod vector; @@ -43,7 +45,8 @@ mod stress; pub use compactor::{ResizeWalOptions, SingleFileOptimizeOptions, VacuumOptions}; pub use iter::*; pub use open::{ - close_single_file, open_single_file, SingleFileOpenOptions, SnapshotParseMode, SyncMode, + close_single_file, close_single_file_with_options, open_single_file, SingleFileCloseOptions, + SingleFileOpenOptions, SnapshotParseMode, SyncMode, }; pub use transaction::SingleFileTxGuard; @@ -149,6 +152,8 @@ pub struct SingleFileDB { /// Vector stores keyed by property key ID /// Each property key can have its own vector store with different dimensions pub(crate) vector_stores: RwLock>, + /// Lazy vector-store section index keyed by property key ID + pub(crate) vector_store_lazy_entries: RwLock>, /// Cache manager for property, traversal, query, and key caches pub(crate) cache: RwLock>, @@ -164,6 +169,11 @@ pub struct SingleFileDB { /// Group commit window in milliseconds pub(crate) group_commit_window_ms: u64, + /// Primary replication runtime (enabled only when role=primary) + pub(crate) primary_replication: Option, + /// Replica replication runtime (enabled only when role=replica) + pub(crate) replica_replication: Option, + #[cfg(feature = "bench-profile")] pub(crate) commit_lock_wait_ns: AtomicU64, #[cfg(feature = "bench-profile")] diff --git a/ray-rs/src/core/single_file/open.rs b/ray-rs/src/core/single_file/open.rs index 44cc4d4..d7f6137 100644 --- a/ray-rs/src/core/single_file/open.rs +++ b/ray-rs/src/core/single_file/open.rs @@ -3,8 +3,10 @@ //! Handles opening, creating, and closing single-file databases. use std::collections::HashMap; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU32, AtomicU64, AtomicUsize, Ordering}; +#[cfg(feature = "bench-profile")] +use std::time::Instant; use parking_lot::{Mutex, RwLock}; @@ -15,6 +17,9 @@ use crate::core::snapshot::reader::SnapshotData; use crate::core::wal::buffer::WalBuffer; use crate::error::{KiteError, Result}; use crate::mvcc::{GcConfig, MvccManager}; +use crate::replication::primary::PrimaryReplication; +use crate::replication::replica::ReplicaReplication; +use crate::replication::types::ReplicationRole; use crate::types::*; use crate::util::compression::CompressionOptions; use crate::util::mmap::map_file; @@ -22,7 +27,7 @@ use crate::vector::store::{create_vector_store, vector_store_delete, vector_stor use crate::vector::types::VectorStoreConfig; use super::recovery::{committed_transactions, replay_wal_record, scan_wal_records}; -use super::vector::vector_stores_from_snapshot; +use super::vector::{materialize_vector_store_from_lazy_entries, vector_store_state_from_snapshot}; use super::{CheckpointStatus, SingleFileDB}; // ============================================================================ @@ -97,6 +102,22 @@ pub struct SingleFileOpenOptions { pub group_commit_window_ms: u64, /// Snapshot parse behavior (default: Strict) pub snapshot_parse_mode: SnapshotParseMode, + /// Replication role (default: Disabled) + pub replication_role: ReplicationRole, + /// Optional replication sidecar path (defaults to derived from DB path) + pub replication_sidecar_path: Option, + /// Source primary db path (replica role only) + pub replication_source_db_path: Option, + /// Source primary sidecar path override (replica role only) + pub replication_source_sidecar_path: Option, + /// Fault injection for tests: fail append once `n` successful appends reached + pub replication_fail_after_append_for_testing: Option, + /// Rotate replication segments when active segment reaches/exceeds this size + pub replication_segment_max_bytes: Option, + /// Retain at least this many entries when pruning old segments + pub replication_retention_min_entries: Option, + /// Retain segments newer than this many milliseconds (primary role only) + pub replication_retention_min_ms: Option, } impl Default for SingleFileOpenOptions { @@ -122,6 +143,14 @@ impl Default for SingleFileOpenOptions { group_commit_enabled: false, group_commit_window_ms: 2, snapshot_parse_mode: SnapshotParseMode::Strict, + replication_role: ReplicationRole::Disabled, + replication_sidecar_path: None, + replication_source_db_path: None, + replication_source_sidecar_path: None, + replication_fail_after_append_for_testing: None, + replication_segment_max_bytes: None, + replication_retention_min_entries: None, + replication_retention_min_ms: None, } } } @@ -245,6 +274,73 @@ impl SingleFileOpenOptions { self.snapshot_parse_mode = mode; self } + + /// Set replication role (disabled | primary | replica) + pub fn replication_role(mut self, role: ReplicationRole) -> Self { + self.replication_role = role; + self + } + + /// Set replication sidecar path (for primary/replica modes) + pub fn replication_sidecar_path>(mut self, path: P) -> Self { + self.replication_sidecar_path = Some(path.as_ref().to_path_buf()); + self + } + + /// Set replication source db path (replica role only) + pub fn replication_source_db_path>(mut self, path: P) -> Self { + self.replication_source_db_path = Some(path.as_ref().to_path_buf()); + self + } + + /// Set replication source sidecar path (replica role only) + pub fn replication_source_sidecar_path>(mut self, path: P) -> Self { + self.replication_source_sidecar_path = Some(path.as_ref().to_path_buf()); + self + } + + /// Test-only fault injection for append failures. + pub fn replication_fail_after_append_for_testing(mut self, value: u64) -> Self { + self.replication_fail_after_append_for_testing = Some(value); + self + } + + /// Set replication segment rotation threshold in bytes (primary role only) + pub fn replication_segment_max_bytes(mut self, value: u64) -> Self { + self.replication_segment_max_bytes = Some(value); + self + } + + /// Set retention minimum entries to keep when pruning (primary role only) + pub fn replication_retention_min_entries(mut self, value: u64) -> Self { + self.replication_retention_min_entries = Some(value); + self + } + + /// Set retention minimum time window in milliseconds (primary role only) + pub fn replication_retention_min_ms(mut self, value: u64) -> Self { + self.replication_retention_min_ms = Some(value); + self + } +} + +/// Options for closing a single-file database. +#[derive(Debug, Clone, Copy, Default)] +pub struct SingleFileCloseOptions { + /// If set, run a blocking checkpoint before close when WAL usage >= threshold. + /// Threshold is clamped to [0.0, 1.0]. + pub checkpoint_if_wal_usage_at_least: Option, +} + +impl SingleFileCloseOptions { + pub fn new() -> Self { + Self::default() + } + + pub fn checkpoint_if_wal_usage_at_least(mut self, threshold: f64) -> Self { + self.checkpoint_if_wal_usage_at_least = Some(threshold); + self + } } struct SnapshotLoadState<'a> { @@ -261,6 +357,32 @@ struct SnapshotLoadState<'a> { next_label_id: &'a mut LabelId, next_etype_id: &'a mut ETypeId, next_propkey_id: &'a mut PropKeyId, + #[cfg(feature = "bench-profile")] + profile: &'a mut OpenProfileCounters, + #[cfg(feature = "bench-profile")] + profile_enabled: bool, +} + +#[cfg(feature = "bench-profile")] +#[derive(Debug, Default)] +struct OpenProfileCounters { + snapshot_parse_ns: u64, + snapshot_crc_ns: u64, + snapshot_decode_ns: u64, + schema_hydrate_ns: u64, + wal_scan_ns: u64, + wal_replay_ns: u64, + vector_init_ns: u64, +} + +#[cfg(feature = "bench-profile")] +fn elapsed_ns(started: Instant) -> u64 { + started.elapsed().as_nanos().min(u128::from(u64::MAX)) as u64 +} + +#[cfg(feature = "bench-profile")] +fn open_profile_enabled() -> bool { + std::env::var_os("KITEDB_BENCH_PROFILE_OPEN").is_some() } fn load_snapshot_and_schema(state: &mut SnapshotLoadState<'_>) -> Result> { @@ -278,17 +400,53 @@ fn load_snapshot_and_schema(state: &mut SnapshotLoadState<'_>) -> Result { + #[cfg(feature = "bench-profile")] + let schema_started = Instant::now(); // Load schema from snapshot for i in 1..=snap.header.num_labels as u32 { if let Some(name) = snap.label_name(i) { @@ -314,6 +472,13 @@ fn load_snapshot_and_schema(state: &mut SnapshotLoadState<'_>) -> Result>( options: SingleFileOpenOptions, ) -> Result { let path = path.as_ref(); + #[cfg(feature = "bench-profile")] + let open_started = Instant::now(); + #[cfg(feature = "bench-profile")] + let mut open_profile = OpenProfileCounters::default(); + #[cfg(feature = "bench-profile")] + let profile_enabled = open_profile_enabled(); // Validate page size if !is_valid_page_size(options.page_size) { @@ -686,17 +857,31 @@ pub fn open_single_file>( next_label_id: &mut next_label_id, next_etype_id: &mut next_etype_id, next_propkey_id: &mut next_propkey_id, + #[cfg(feature = "bench-profile")] + profile: &mut open_profile, + #[cfg(feature = "bench-profile")] + profile_enabled, }; let snapshot = load_snapshot_and_schema(&mut snapshot_state)?; // Replay WAL for recovery (if not a new database) let mut _wal_records_storage: Option>; if !is_new && header.wal_head > 0 { + #[cfg(feature = "bench-profile")] + let wal_scan_started = Instant::now(); _wal_records_storage = Some(scan_wal_records(&mut pager, &header)?); + #[cfg(feature = "bench-profile")] + { + open_profile.wal_scan_ns = open_profile + .wal_scan_ns + .saturating_add(elapsed_ns(wal_scan_started)); + } if let Some(ref wal_records) = _wal_records_storage { committed_in_order = committed_transactions(wal_records); // Replay committed transactions + #[cfg(feature = "bench-profile")] + let wal_replay_started = Instant::now(); for (_txid, records) in &committed_in_order { for record in records { replay_wal_record( @@ -717,20 +902,47 @@ pub fn open_single_file>( } next_commit_ts += 1; } + #[cfg(feature = "bench-profile")] + { + open_profile.wal_replay_ns = open_profile + .wal_replay_ns + .saturating_add(elapsed_ns(wal_replay_started)); + } } } else { _wal_records_storage = None; } - // Load vector stores from snapshot (if present) - let mut vector_stores = if let Some(ref snapshot) = snapshot { - vector_stores_from_snapshot(snapshot)? + // Load vector-store state from snapshot (if present). + // Newer snapshots keep stores lazy until first access. + #[cfg(feature = "bench-profile")] + let vector_init_started = Instant::now(); + let (mut vector_stores, mut vector_store_lazy_entries) = if let Some(ref snapshot) = snapshot { + if snapshot + .header + .flags + .contains(SnapshotFlags::HAS_VECTOR_STORES) + || snapshot.header.flags.contains(SnapshotFlags::HAS_VECTORS) + { + vector_store_state_from_snapshot(snapshot)? + } else { + (HashMap::new(), HashMap::new()) + } } else { - HashMap::new() + (HashMap::new(), HashMap::new()) }; // Apply pending vector operations from WAL replay for ((node_id, prop_key_id), operation) in delta.pending_vectors.drain() { + if let Some(ref snapshot) = snapshot { + materialize_vector_store_from_lazy_entries( + snapshot, + &mut vector_stores, + &mut vector_store_lazy_entries, + prop_key_id, + )?; + } + match operation { Some(vector) => { // Get or create vector store @@ -752,6 +964,12 @@ pub fn open_single_file>( } } } + #[cfg(feature = "bench-profile")] + { + open_profile.vector_init_ns = open_profile + .vector_init_ns + .saturating_add(elapsed_ns(vector_init_started)); + } // Initialize cache if enabled let cache = options.cache.clone().map(CacheManager::new); @@ -765,6 +983,56 @@ pub fn open_single_file>( &delta, ); + let (primary_replication, replica_replication) = match options.replication_role { + ReplicationRole::Disabled => (None, None), + ReplicationRole::Primary => ( + Some(PrimaryReplication::open( + path, + options.replication_sidecar_path.clone(), + options.replication_segment_max_bytes, + options.replication_retention_min_entries, + options.replication_retention_min_ms, + options.sync_mode, + options.replication_fail_after_append_for_testing, + )?), + None, + ), + ReplicationRole::Replica => ( + None, + Some(ReplicaReplication::open( + path, + options.replication_sidecar_path.clone(), + options.replication_source_db_path.clone(), + options.replication_source_sidecar_path.clone(), + )?), + ), + }; + + #[cfg(feature = "bench-profile")] + { + if profile_enabled { + let total_ns = elapsed_ns(open_started); + let wal_records = _wal_records_storage.as_ref().map(|r| r.len()).unwrap_or(0); + eprintln!( + "[bench-profile][open] path={} total_ns={} snapshot_parse_ns={} snapshot_crc_ns={} snapshot_decode_ns={} schema_hydrate_ns={} wal_scan_ns={} wal_replay_ns={} vector_init_ns={} snapshot_loaded={} wal_records={} wal_txs={} vector_stores={} vector_lazy_entries={}", + path.display(), + total_ns, + open_profile.snapshot_parse_ns, + open_profile.snapshot_crc_ns, + open_profile.snapshot_decode_ns, + open_profile.schema_hydrate_ns, + open_profile.wal_scan_ns, + open_profile.wal_replay_ns, + open_profile.vector_init_ns, + usize::from(snapshot.is_some()), + wal_records, + committed_in_order.len(), + vector_stores.len(), + vector_store_lazy_entries.len(), + ); + } + } + Ok(SingleFileDB { path: path.to_path_buf(), read_only: options.read_only, @@ -795,11 +1063,14 @@ pub fn open_single_file>( background_checkpoint: options.background_checkpoint, checkpoint_status: Mutex::new(CheckpointStatus::Idle), vector_stores: RwLock::new(vector_stores), + vector_store_lazy_entries: RwLock::new(vector_store_lazy_entries), cache: RwLock::new(cache), checkpoint_compression: options.checkpoint_compression.clone(), sync_mode: options.sync_mode, group_commit_enabled: options.group_commit_enabled, group_commit_window_ms: options.group_commit_window_ms, + primary_replication, + replica_replication, #[cfg(feature = "bench-profile")] commit_lock_wait_ns: AtomicU64::new(0), #[cfg(feature = "bench-profile")] @@ -807,8 +1078,24 @@ pub fn open_single_file>( }) } -/// Close a single-file database -pub fn close_single_file(db: SingleFileDB) -> Result<()> { +/// Close a single-file database using custom close options. +pub fn close_single_file_with_options( + db: SingleFileDB, + options: SingleFileCloseOptions, +) -> Result<()> { + if let Some(threshold_raw) = options.checkpoint_if_wal_usage_at_least { + if !threshold_raw.is_finite() { + return Err(KiteError::Internal(format!( + "invalid close checkpoint threshold: {threshold_raw}" + ))); + } + + let threshold = threshold_raw.clamp(0.0, 1.0); + if !db.read_only && db.should_checkpoint(threshold) { + db.checkpoint()?; + } + } + if let Some(ref mvcc) = db.mvcc { mvcc.stop(); } @@ -838,11 +1125,18 @@ pub fn close_single_file(db: SingleFileDB) -> Result<()> { Ok(()) } +/// Close a single-file database with default close behavior. +pub fn close_single_file(db: SingleFileDB) -> Result<()> { + close_single_file_with_options(db, SingleFileCloseOptions::default()) +} + #[cfg(test)] mod tests { use super::*; - use crate::core::single_file::close_single_file; use crate::core::single_file::recovery::read_wal_area; + use crate::core::single_file::{ + close_single_file, close_single_file_with_options, SingleFileCloseOptions, + }; use crate::core::wal::record::parse_wal_record; use crate::util::binary::{align_up, read_u32}; use tempfile::tempdir; @@ -1193,4 +1487,68 @@ mod tests { assert!(db.node_by_key("n2").is_some()); close_single_file(db).expect("expected value"); } + + #[test] + fn test_close_with_checkpoint_if_wal_over_clears_wal() { + let temp_dir = tempdir().expect("expected value"); + let db_path = temp_dir.path().join("close-with-checkpoint.kitedb"); + + let db = open_single_file( + &db_path, + SingleFileOpenOptions::new().auto_checkpoint(false), + ) + .expect("expected value"); + + db.begin(false).expect("expected value"); + let _ = db.create_node(Some("n1")).expect("expected value"); + db.commit().expect("expected value"); + assert!(db.should_checkpoint(0.0)); + + close_single_file_with_options( + db, + SingleFileCloseOptions::new().checkpoint_if_wal_usage_at_least(0.0), + ) + .expect("expected value"); + + let reopened = open_single_file( + &db_path, + SingleFileOpenOptions::new().auto_checkpoint(false), + ) + .expect("expected value"); + let header = reopened.header.read().clone(); + assert_eq!(header.wal_head, 0); + assert_eq!(header.wal_tail, 0); + close_single_file(reopened).expect("expected value"); + } + + #[test] + fn test_close_with_high_threshold_keeps_wal() { + let temp_dir = tempdir().expect("expected value"); + let db_path = temp_dir.path().join("close-without-checkpoint.kitedb"); + + let db = open_single_file( + &db_path, + SingleFileOpenOptions::new().auto_checkpoint(false), + ) + .expect("expected value"); + + db.begin(false).expect("expected value"); + let _ = db.create_node(Some("n1")).expect("expected value"); + db.commit().expect("expected value"); + + close_single_file_with_options( + db, + SingleFileCloseOptions::new().checkpoint_if_wal_usage_at_least(1.0), + ) + .expect("expected value"); + + let reopened = open_single_file( + &db_path, + SingleFileOpenOptions::new().auto_checkpoint(false), + ) + .expect("expected value"); + let header = reopened.header.read().clone(); + assert!(header.wal_head > 0); + close_single_file(reopened).expect("expected value"); + } } diff --git a/ray-rs/src/core/single_file/replication.rs b/ray-rs/src/core/single_file/replication.rs new file mode 100644 index 0000000..8147135 --- /dev/null +++ b/ray-rs/src/core/single_file/replication.rs @@ -0,0 +1,1203 @@ +//! Replica-side operations and token wait helpers. + +use crate::core::wal::record::{ + parse_add_edge_payload, parse_add_edge_props_payload, parse_add_edges_batch_payload, + parse_add_edges_props_batch_payload, parse_add_node_label_payload, parse_create_node_payload, + parse_create_nodes_batch_payload, parse_del_edge_prop_payload, parse_del_node_prop_payload, + parse_del_node_vector_payload, parse_delete_edge_payload, parse_delete_node_payload, + parse_remove_node_label_payload, parse_set_edge_prop_payload, parse_set_edge_props_payload, + parse_set_node_prop_payload, parse_set_node_vector_payload, parse_wal_record, ParsedWalRecord, +}; +use crate::error::{KiteError, Result}; +use crate::replication::manifest::ManifestStore; +use crate::replication::primary::PrimaryRetentionOutcome; +use crate::replication::replica::ReplicaReplicationStatus; +use crate::replication::transport::decode_commit_frame_payload; +use crate::replication::types::{CommitToken, ReplicationCursor, ReplicationRole}; +use crate::types::WalRecordType; +use crate::util::crc::{crc32c, Crc32cHasher}; +use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; +use base64::Engine; +use serde_json::json; +use std::collections::HashSet; +use std::fs::File; +use std::io::{BufReader, Read, Write}; +use std::path::Path; +use std::str::FromStr; +use std::time::{Duration, Instant}; + +use super::{close_single_file, open_single_file, SingleFileDB, SingleFileOpenOptions}; + +const REPLICATION_MANIFEST_FILE: &str = "manifest.json"; +const REPLICATION_FRAME_MAGIC: u32 = 0x474F_4C52; +const REPLICATION_FRAME_VERSION: u16 = 1; +const REPLICATION_FRAME_FLAG_CRC32_DISABLED: u16 = 0x0001; +const REPLICATION_FRAME_HEADER_BYTES: usize = 32; +const REPLICATION_MAX_FRAME_PAYLOAD_BYTES: usize = 64 * 1024 * 1024; +const REPLICATION_IO_CHUNK_BYTES: usize = 64 * 1024; +const REPLICATION_SNAPSHOT_INLINE_MAX_BYTES: u64 = 32 * 1024 * 1024; +const REPLICA_CATCH_UP_MAX_ATTEMPTS: usize = 5; +const REPLICA_CATCH_UP_INITIAL_BACKOFF_MS: u64 = 10; +const REPLICA_CATCH_UP_MAX_BACKOFF_MS: u64 = 160; +const REPLICA_BOOTSTRAP_MAX_ATTEMPTS: usize = 20; +const REPLICA_BOOTSTRAP_INITIAL_BACKOFF_MS: u64 = 10; +const REPLICA_BOOTSTRAP_MAX_BACKOFF_MS: u64 = 320; + +impl SingleFileDB { + /// Promote this primary instance to the next replication epoch. + pub fn primary_promote_to_next_epoch(&self) -> Result { + self + .primary_replication + .as_ref() + .ok_or_else(|| { + KiteError::InvalidReplication("database is not opened in primary role".to_string()) + })? + .promote_to_next_epoch() + } + + /// Report a replica's applied cursor to drive retention decisions. + pub fn primary_report_replica_progress( + &self, + replica_id: &str, + epoch: u64, + applied_log_index: u64, + ) -> Result<()> { + self + .primary_replication + .as_ref() + .ok_or_else(|| { + KiteError::InvalidReplication("database is not opened in primary role".to_string()) + })? + .report_replica_progress(replica_id, epoch, applied_log_index) + } + + /// Run retention pruning on primary replication segments. + pub fn primary_run_retention(&self) -> Result { + self + .primary_replication + .as_ref() + .ok_or_else(|| { + KiteError::InvalidReplication("database is not opened in primary role".to_string()) + })? + .run_retention() + } + + /// Replica status surface. + pub fn replica_replication_status(&self) -> Option { + self + .replica_replication + .as_ref() + .map(|replication| replication.status()) + } + + /// Bootstrap replica state from source primary snapshot. + pub fn replica_bootstrap_from_snapshot(&self) -> Result<()> { + let runtime = self.replica_replication.as_ref().ok_or_else(|| { + KiteError::InvalidReplication("database is not opened in replica role".to_string()) + })?; + + let source_db_path = runtime.source_db_path().ok_or_else(|| { + KiteError::InvalidReplication("replica source db path is not configured".to_string()) + })?; + + let mut attempts = 0usize; + let mut backoff_ms = REPLICA_BOOTSTRAP_INITIAL_BACKOFF_MS; + loop { + attempts = attempts.saturating_add(1); + let source = open_single_file( + &source_db_path, + SingleFileOpenOptions::new() + .read_only(true) + .create_if_missing(false) + .replication_role(ReplicationRole::Disabled), + )?; + + let bootstrap_start = runtime.source_head_position()?; + let bootstrap_source_fingerprint = source_db_fingerprint(&source_db_path)?; + let sync_result = (|| { + std::thread::sleep(Duration::from_millis(10)); + let quiesce_head = runtime.source_head_position()?; + let quiesce_fingerprint = source_db_fingerprint(&source_db_path)?; + if quiesce_head != bootstrap_start || quiesce_fingerprint != bootstrap_source_fingerprint { + return Err(KiteError::InvalidReplication(format!( + "source primary did not quiesce for snapshot bootstrap; start={}:{}, observed={}:{}, start_crc={:08x}, observed_crc={:08x}; quiesce writes and retry", + bootstrap_start.0, + bootstrap_start.1, + quiesce_head.0, + quiesce_head.1, + bootstrap_source_fingerprint.1, + quiesce_fingerprint.1 + ))); + } + sync_graph_state(self, &source, || { + let bootstrap_end = runtime.source_head_position()?; + let bootstrap_end_fingerprint = source_db_fingerprint(&source_db_path)?; + if bootstrap_end != bootstrap_start + || bootstrap_end_fingerprint != bootstrap_source_fingerprint + { + return Err(KiteError::InvalidReplication(format!( + "source primary advanced during snapshot bootstrap; start={}:{}, end={}:{}, start_crc={:08x}, end_crc={:08x}; quiesce writes and retry", + bootstrap_start.0, + bootstrap_start.1, + bootstrap_end.0, + bootstrap_end.1, + bootstrap_source_fingerprint.1, + bootstrap_end_fingerprint.1 + ))); + } + std::thread::sleep(Duration::from_millis(10)); + let quiesce_head = runtime.source_head_position()?; + let quiesce_fingerprint = source_db_fingerprint(&source_db_path)?; + if quiesce_head != bootstrap_start || quiesce_fingerprint != bootstrap_source_fingerprint { + return Err(KiteError::InvalidReplication(format!( + "source primary did not quiesce for snapshot bootstrap; start={}:{}, observed={}:{}, start_crc={:08x}, observed_crc={:08x}; quiesce writes and retry", + bootstrap_start.0, + bootstrap_start.1, + quiesce_head.0, + quiesce_head.1, + bootstrap_source_fingerprint.1, + quiesce_fingerprint.1 + ))); + } + Ok(()) + }) + })() + .and_then(|_| { + runtime.mark_applied(bootstrap_start.0, bootstrap_start.1)?; + runtime.clear_error() + }); + + let close_result = close_single_file(source); + if let Err(error) = sync_result { + if is_bootstrap_quiesce_error(&error) && attempts < REPLICA_BOOTSTRAP_MAX_ATTEMPTS { + std::thread::sleep(Duration::from_millis(backoff_ms)); + backoff_ms = backoff_ms + .saturating_mul(2) + .min(REPLICA_BOOTSTRAP_MAX_BACKOFF_MS); + continue; + } + let _ = runtime.mark_error(error.to_string(), false); + return Err(error); + } + close_result?; + return Ok(()); + } + } + + /// Force snapshot reseed for replicas that lost log continuity. + pub fn replica_reseed_from_snapshot(&self) -> Result<()> { + self.replica_bootstrap_from_snapshot() + } + + /// Pull and apply the next batch of replication frames. + pub fn replica_catch_up_once(&self, max_frames: usize) -> Result { + self.replica_catch_up_internal(max_frames, false) + } + + /// Test helper: request a batch including last-applied frame to verify idempotency. + pub fn replica_catch_up_once_replaying_last_for_testing( + &self, + max_frames: usize, + ) -> Result { + self.replica_catch_up_internal(max_frames, true) + } + + /// Wait until this DB has applied at least the given token. + pub fn wait_for_token(&self, token: CommitToken, timeout_ms: u64) -> Result { + let deadline = Instant::now() + Duration::from_millis(timeout_ms); + + loop { + if self.has_token(token) { + return Ok(true); + } + + if Instant::now() >= deadline { + return Ok(false); + } + + std::thread::sleep(Duration::from_millis(10)); + } + } + + fn has_token(&self, token: CommitToken) -> bool { + if let Some(status) = self.primary_replication_status() { + if let Some(last_token) = status.last_token { + return last_token >= token; + } + } + + if let Some(status) = self.replica_replication_status() { + let replica_token = CommitToken::new(status.applied_epoch, status.applied_log_index); + return replica_token >= token; + } + + false + } + + fn replica_catch_up_internal(&self, max_frames: usize, replay_last: bool) -> Result { + let runtime = self.replica_replication.as_ref().ok_or_else(|| { + KiteError::InvalidReplication("database is not opened in replica role".to_string()) + })?; + + let mut attempts = 0usize; + let mut backoff_ms = REPLICA_CATCH_UP_INITIAL_BACKOFF_MS; + loop { + attempts = attempts.saturating_add(1); + match self.replica_catch_up_attempt(runtime, max_frames.max(1), replay_last) { + Ok(applied) => return Ok(applied), + Err(error) => { + let needs_reseed = runtime.status().needs_reseed || is_reseed_error(&error); + if needs_reseed { + return Err(error); + } + + if attempts >= REPLICA_CATCH_UP_MAX_ATTEMPTS { + let _ = runtime.mark_error(error.to_string(), false); + return Err(error); + } + + std::thread::sleep(Duration::from_millis(backoff_ms)); + backoff_ms = backoff_ms + .saturating_mul(2) + .min(REPLICA_CATCH_UP_MAX_BACKOFF_MS); + } + } + } + } + + fn replica_catch_up_attempt( + &self, + runtime: &crate::replication::replica::ReplicaReplication, + max_frames: usize, + replay_last: bool, + ) -> Result { + let frames = runtime.frames_after(max_frames, replay_last)?; + if frames.is_empty() { + runtime.clear_error()?; + return Ok(0); + } + + let (mut applied_epoch, mut applied_log_index) = runtime.applied_position(); + let mut applied = 0usize; + for frame in frames { + let already_applied = applied_epoch > frame.epoch + || (applied_epoch == frame.epoch && applied_log_index >= frame.log_index); + if already_applied { + continue; + } + + if let Err(error) = apply_replication_frame(self, &frame.payload) { + if applied > 0 { + let _ = runtime.mark_applied(applied_epoch, applied_log_index); + } + return Err(KiteError::InvalidReplication(format!( + "replica apply failed at {}:{}: {error}", + frame.epoch, frame.log_index + ))); + } + + applied_epoch = frame.epoch; + applied_log_index = frame.log_index; + applied = applied.saturating_add(1); + } + + if applied > 0 { + runtime + .mark_applied(applied_epoch, applied_log_index) + .map_err(|error| { + KiteError::InvalidReplication(format!( + "replica cursor persist failed at {}:{}: {error}", + applied_epoch, applied_log_index + )) + })?; + } + + runtime.clear_error()?; + Ok(applied) + } + + /// Export latest primary snapshot metadata and optional bytes as transport JSON. + pub fn primary_export_snapshot_transport_json(&self, include_data: bool) -> Result { + let status = self.primary_replication_status().ok_or_else(|| { + KiteError::InvalidReplication("database is not opened in primary role".to_string()) + })?; + let (byte_length, checksum_crc32c, data_base64) = + read_snapshot_transport_payload(&self.path, include_data)?; + let generated_at_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + + let payload = json!({ + "format": "single-file-db-copy", + "db_path": self.path.to_string_lossy().to_string(), + "byte_length": byte_length, + "checksum_crc32c": checksum_crc32c, + "generated_at_ms": generated_at_ms, + "epoch": status.epoch, + "head_log_index": status.head_log_index, + "retained_floor": status.retained_floor, + "start_cursor": ReplicationCursor::new(status.epoch, 0, 0, status.retained_floor).to_string(), + "data_base64": data_base64, + }); + + serde_json::to_string(&payload).map_err(|error| { + KiteError::Serialization(format!("encode replication snapshot export: {error}")) + }) + } + + /// Export primary replication log frames with cursor paging as transport JSON. + pub fn primary_export_log_transport_json( + &self, + cursor: Option<&str>, + max_frames: usize, + max_bytes: usize, + include_payload: bool, + ) -> Result { + if max_frames == 0 { + return Err(KiteError::InvalidQuery("max_frames must be > 0".into())); + } + if max_bytes == 0 { + return Err(KiteError::InvalidQuery("max_bytes must be > 0".into())); + } + + let primary_replication = self.primary_replication.as_ref().ok_or_else(|| { + KiteError::InvalidReplication("database is not opened in primary role".to_string()) + })?; + primary_replication.flush_for_transport_export()?; + let status = primary_replication.status(); + let sidecar_path = status.sidecar_path; + let manifest = ManifestStore::new(sidecar_path.join(REPLICATION_MANIFEST_FILE)).read()?; + let parsed_cursor = match cursor { + Some(raw) if !raw.trim().is_empty() => Some( + ReplicationCursor::from_str(raw) + .map_err(|error| KiteError::InvalidReplication(format!("invalid cursor: {error}")))?, + ), + _ => None, + }; + + let mut segments = manifest.segments.clone(); + segments.sort_by_key(|segment| segment.id); + + let mut frames = Vec::new(); + let mut total_bytes = 0usize; + let mut next_cursor: Option = None; + let mut limited = false; + + 'outer: for segment in segments { + let segment_path = sidecar_path.join(format_segment_file_name(segment.id)); + if !segment_path.exists() { + continue; + } + + let mut reader = BufReader::new(File::open(&segment_path)?); + let mut offset = 0u64; + loop { + let Some(header) = read_frame_header(&mut reader, segment.id, offset)? else { + break; + }; + + let frame_offset = offset; + let frame_bytes = REPLICATION_FRAME_HEADER_BYTES + .checked_add(header.payload_len) + .ok_or_else(|| { + KiteError::InvalidReplication("replication frame payload overflow".to_string()) + })?; + let payload_end = frame_offset + .checked_add(frame_bytes as u64) + .ok_or_else(|| { + KiteError::InvalidReplication("replication frame payload overflow".to_string()) + })?; + + let include_frame = frame_after_cursor( + parsed_cursor, + header.epoch, + segment.id, + frame_offset, + header.log_index, + ); + if include_frame { + if frame_bytes > max_bytes { + return Err(KiteError::InvalidQuery( + format!("max_bytes budget {max_bytes} is smaller than frame size {frame_bytes}") + .into(), + )); + } + if frames.len() >= max_frames || total_bytes.saturating_add(frame_bytes) > max_bytes { + limited = true; + break 'outer; + } + } + + let payload_base64 = read_frame_payload( + &mut reader, + segment.id, + frame_offset, + &header, + include_payload && include_frame, + )?; + + if include_frame { + next_cursor = Some( + ReplicationCursor::new(header.epoch, segment.id, payload_end, header.log_index) + .to_string(), + ); + frames.push(json!({ + "epoch": header.epoch, + "log_index": header.log_index, + "segment_id": segment.id, + "segment_offset": frame_offset, + "bytes": frame_bytes, + "payload_base64": payload_base64, + })); + total_bytes = total_bytes.saturating_add(frame_bytes); + } + + offset = payload_end; + } + } + + let payload = json!({ + "epoch": manifest.epoch, + "head_log_index": manifest.head_log_index, + "retained_floor": manifest.retained_floor, + "cursor": parsed_cursor.map(|value| value.to_string()), + "next_cursor": next_cursor, + "eof": !limited, + "frame_count": frames.len(), + "total_bytes": total_bytes, + "frames": frames, + }); + + serde_json::to_string(&payload) + .map_err(|error| KiteError::Serialization(format!("encode replication log export: {error}"))) + } +} + +fn is_reseed_error(error: &KiteError) -> bool { + matches!( + error, + KiteError::InvalidReplication(message) if message.to_ascii_lowercase().contains("reseed") + ) +} + +fn is_bootstrap_quiesce_error(error: &KiteError) -> bool { + match error { + KiteError::InvalidReplication(message) => { + message.contains("source primary advanced during snapshot bootstrap") + || message.contains("source primary did not quiesce for snapshot bootstrap") + } + _ => false, + } +} + +fn read_snapshot_transport_payload( + path: &Path, + include_data: bool, +) -> Result<(u64, String, Option)> { + let metadata = std::fs::metadata(path)?; + if include_data && metadata.len() > REPLICATION_SNAPSHOT_INLINE_MAX_BYTES { + return Err(KiteError::InvalidReplication(format!( + "snapshot size {} exceeds max inline payload {} bytes", + metadata.len(), + REPLICATION_SNAPSHOT_INLINE_MAX_BYTES + ))); + } + + let mut reader = BufReader::new(File::open(path)?); + let mut hasher = Crc32cHasher::new(); + let mut bytes_read = 0u64; + let mut chunk = [0u8; REPLICATION_IO_CHUNK_BYTES]; + + if include_data { + let mut encoder = base64::write::EncoderWriter::new(Vec::new(), &BASE64_STANDARD); + loop { + let read = reader.read(&mut chunk)?; + if read == 0 { + break; + } + + let payload = &chunk[..read]; + bytes_read = bytes_read.saturating_add(read as u64); + if bytes_read > REPLICATION_SNAPSHOT_INLINE_MAX_BYTES { + return Err(KiteError::InvalidReplication(format!( + "snapshot size {} exceeds max inline payload {} bytes", + bytes_read, REPLICATION_SNAPSHOT_INLINE_MAX_BYTES + ))); + } + hasher.update(payload); + encoder.write_all(payload)?; + } + + let encoded = String::from_utf8(encoder.finish()?).map_err(|error| { + KiteError::Serialization(format!("snapshot base64 encoding failed: {error}")) + })?; + return Ok(( + bytes_read, + format!("{:08x}", hasher.finalize()), + Some(encoded), + )); + } + + loop { + let read = reader.read(&mut chunk)?; + if read == 0 { + break; + } + bytes_read = bytes_read.saturating_add(read as u64); + hasher.update(&chunk[..read]); + } + + Ok((bytes_read, format!("{:08x}", hasher.finalize()), None)) +} + +fn frame_after_cursor( + cursor: Option, + epoch: u64, + segment_id: u64, + segment_offset: u64, + log_index: u64, +) -> bool { + match cursor { + None => true, + Some(cursor) => { + (epoch, log_index, segment_id, segment_offset) + > ( + cursor.epoch, + cursor.log_index, + cursor.segment_id, + cursor.segment_offset, + ) + } + } +} + +fn le_u32(bytes: &[u8]) -> Result { + let value: [u8; 4] = bytes + .try_into() + .map_err(|_| KiteError::InvalidReplication("invalid frame u32 field".to_string()))?; + Ok(u32::from_le_bytes(value)) +} + +fn le_u16(bytes: &[u8]) -> Result { + let value: [u8; 2] = bytes + .try_into() + .map_err(|_| KiteError::InvalidReplication("invalid frame u16 field".to_string()))?; + Ok(u16::from_le_bytes(value)) +} + +fn le_u64(bytes: &[u8]) -> Result { + let value: [u8; 8] = bytes + .try_into() + .map_err(|_| KiteError::InvalidReplication("invalid frame u64 field".to_string()))?; + Ok(u64::from_le_bytes(value)) +} + +fn format_segment_file_name(id: u64) -> String { + format!("segment-{id:020}.rlog") +} + +#[derive(Debug, Clone, Copy)] +struct ParsedFrameHeader { + epoch: u64, + log_index: u64, + payload_len: usize, + stored_crc32: u32, + crc_disabled: bool, +} + +fn read_frame_header( + reader: &mut BufReader, + segment_id: u64, + frame_offset: u64, +) -> Result> { + let mut header_bytes = [0u8; REPLICATION_FRAME_HEADER_BYTES]; + let mut filled = 0usize; + while filled < REPLICATION_FRAME_HEADER_BYTES { + let read = reader.read(&mut header_bytes[filled..])?; + if read == 0 { + if filled == 0 { + return Ok(None); + } + return Err(KiteError::InvalidReplication(format!( + "replication frame truncated in segment {} at byte {}", + segment_id, frame_offset + ))); + } + filled = filled.saturating_add(read); + } + + parse_frame_header(&header_bytes, segment_id, frame_offset).map(Some) +} + +fn parse_frame_header( + header_bytes: &[u8; REPLICATION_FRAME_HEADER_BYTES], + segment_id: u64, + frame_offset: u64, +) -> Result { + let magic = le_u32(&header_bytes[0..4])?; + if magic != REPLICATION_FRAME_MAGIC { + return Err(KiteError::InvalidReplication(format!( + "invalid replication frame magic 0x{magic:08X} in segment {} at byte {}", + segment_id, frame_offset + ))); + } + + let version = le_u16(&header_bytes[4..6])?; + if version != REPLICATION_FRAME_VERSION { + return Err(KiteError::VersionMismatch { + required: version as u32, + current: REPLICATION_FRAME_VERSION as u32, + }); + } + + let flags = le_u16(&header_bytes[6..8])?; + if flags & !REPLICATION_FRAME_FLAG_CRC32_DISABLED != 0 { + return Err(KiteError::InvalidReplication(format!( + "unsupported replication frame flags 0x{flags:04X} in segment {} at byte {}", + segment_id, frame_offset + ))); + } + + let payload_len = le_u32(&header_bytes[24..28])? as usize; + if payload_len > REPLICATION_MAX_FRAME_PAYLOAD_BYTES { + return Err(KiteError::InvalidReplication(format!( + "frame payload exceeds limit: {}", + payload_len + ))); + } + + Ok(ParsedFrameHeader { + epoch: le_u64(&header_bytes[8..16])?, + log_index: le_u64(&header_bytes[16..24])?, + payload_len, + stored_crc32: le_u32(&header_bytes[28..32])?, + crc_disabled: (flags & REPLICATION_FRAME_FLAG_CRC32_DISABLED) != 0, + }) +} + +fn read_frame_payload( + reader: &mut BufReader, + segment_id: u64, + frame_offset: u64, + header: &ParsedFrameHeader, + capture_base64: bool, +) -> Result> { + if capture_base64 { + let mut payload = vec![0u8; header.payload_len]; + reader + .read_exact(&mut payload) + .map_err(|error| map_frame_payload_read_error(error, segment_id, frame_offset))?; + if !header.crc_disabled { + let computed_crc32 = crc32c(&payload); + if computed_crc32 != header.stored_crc32 { + return Err(KiteError::CrcMismatch { + stored: header.stored_crc32, + computed: computed_crc32, + }); + } + } + return Ok(Some(BASE64_STANDARD.encode(payload))); + } + + let mut hasher = (!header.crc_disabled).then(Crc32cHasher::new); + consume_payload_stream(reader, header.payload_len, |chunk| { + if let Some(hasher) = hasher.as_mut() { + hasher.update(chunk); + } + }) + .map_err(|error| map_frame_payload_read_error(error, segment_id, frame_offset))?; + + if let Some(hasher) = hasher { + let computed_crc32 = hasher.finalize(); + if computed_crc32 != header.stored_crc32 { + return Err(KiteError::CrcMismatch { + stored: header.stored_crc32, + computed: computed_crc32, + }); + } + } + + Ok(None) +} + +fn consume_payload_stream( + reader: &mut BufReader, + payload_len: usize, + mut visit: impl FnMut(&[u8]), +) -> std::io::Result<()> { + let mut remaining = payload_len; + let mut chunk = [0u8; REPLICATION_IO_CHUNK_BYTES]; + while remaining > 0 { + let want = remaining.min(chunk.len()); + let read = reader.read(&mut chunk[..want])?; + if read == 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "replication frame payload truncated", + )); + } + visit(&chunk[..read]); + remaining -= read; + } + Ok(()) +} + +fn map_frame_payload_read_error( + error: std::io::Error, + segment_id: u64, + frame_offset: u64, +) -> KiteError { + if error.kind() == std::io::ErrorKind::UnexpectedEof { + KiteError::InvalidReplication(format!( + "replication frame truncated in segment {} at byte {}", + segment_id, frame_offset + )) + } else { + KiteError::Io(error) + } +} + +fn source_db_fingerprint(path: &Path) -> Result<(u64, u32)> { + let mut reader = BufReader::new(File::open(path)?); + let mut hasher = Crc32cHasher::new(); + let mut chunk = [0u8; REPLICATION_IO_CHUNK_BYTES]; + let mut bytes = 0u64; + + loop { + let read = reader.read(&mut chunk)?; + if read == 0 { + break; + } + hasher.update(&chunk[..read]); + bytes = bytes.saturating_add(read as u64); + } + + Ok((bytes, hasher.finalize())) +} + +fn sync_graph_state( + replica: &SingleFileDB, + source: &SingleFileDB, + before_commit: F, +) -> Result<()> +where + F: FnOnce() -> Result<()>, +{ + let tx_guard = replica.begin_guard(false)?; + + let source_nodes = source.list_nodes(); + let source_node_set: HashSet<_> = source_nodes.iter().copied().collect(); + + for &node_id in &source_nodes { + let source_key = source.node_key(node_id); + if replica.node_exists(node_id) { + if replica.node_key(node_id) != source_key { + let _ = replica.delete_node(node_id)?; + replica.create_node_with_id(node_id, source_key.as_deref())?; + } + } else { + replica.create_node_with_id(node_id, source_key.as_deref())?; + } + } + + for node_id in replica.list_nodes() { + if !source_node_set.contains(&node_id) { + let _ = replica.delete_node(node_id)?; + } + } + + for &node_id in &source_nodes { + let source_props = source.node_props(node_id).unwrap_or_default(); + let replica_props = replica.node_props(node_id).unwrap_or_default(); + for (&key_id, value) in &source_props { + if replica_props.get(&key_id) != Some(value) { + replica.set_node_prop(node_id, key_id, value.clone())?; + } + } + for &key_id in replica_props.keys() { + if !source_props.contains_key(&key_id) { + replica.delete_node_prop(node_id, key_id)?; + } + } + + let source_labels: HashSet<_> = source.node_labels(node_id).into_iter().collect(); + let replica_labels: HashSet<_> = replica.node_labels(node_id).into_iter().collect(); + for &label_id in &source_labels { + if !replica_labels.contains(&label_id) { + replica.add_node_label(node_id, label_id)?; + } + } + for &label_id in &replica_labels { + if !source_labels.contains(&label_id) { + replica.remove_node_label(node_id, label_id)?; + } + } + } + + let mut vector_prop_keys = source.vector_prop_keys(); + vector_prop_keys.extend(replica.vector_prop_keys()); + for &node_id in &source_nodes { + for &prop_key_id in &vector_prop_keys { + let source_vector = source.node_vector(node_id, prop_key_id); + let replica_vector = replica.node_vector(node_id, prop_key_id); + match (source_vector, replica_vector) { + (Some(source_value), Some(replica_value)) => { + if source_value.as_ref() != replica_value.as_ref() { + replica.set_node_vector(node_id, prop_key_id, source_value.as_ref())?; + } + } + (Some(source_value), None) => { + replica.set_node_vector(node_id, prop_key_id, source_value.as_ref())?; + } + (None, Some(_)) => { + replica.delete_node_vector(node_id, prop_key_id)?; + } + (None, None) => {} + } + } + } + + let source_edges = source.list_edges(None); + let source_edge_set: HashSet<_> = source_edges + .iter() + .map(|edge| (edge.src, edge.etype, edge.dst)) + .collect(); + + for edge in &source_edges { + if !replica.edge_exists(edge.src, edge.etype, edge.dst) { + replica.add_edge(edge.src, edge.etype, edge.dst)?; + } + } + + for edge in replica.list_edges(None) { + if !source_edge_set.contains(&(edge.src, edge.etype, edge.dst)) { + replica.delete_edge(edge.src, edge.etype, edge.dst)?; + } + } + + for edge in source_edges { + let source_props = source + .edge_props(edge.src, edge.etype, edge.dst) + .unwrap_or_default(); + let replica_props = replica + .edge_props(edge.src, edge.etype, edge.dst) + .unwrap_or_default(); + + for (&key_id, value) in &source_props { + if replica_props.get(&key_id) != Some(value) { + replica.set_edge_prop(edge.src, edge.etype, edge.dst, key_id, value.clone())?; + } + } + for &key_id in replica_props.keys() { + if !source_props.contains_key(&key_id) { + replica.delete_edge_prop(edge.src, edge.etype, edge.dst, key_id)?; + } + } + } + + before_commit()?; + tx_guard.commit() +} + +fn apply_replication_frame(db: &SingleFileDB, payload: &[u8]) -> Result<()> { + let decoded = decode_commit_frame_payload(payload)?; + let records = parse_wal_records(&decoded.wal_bytes)?; + + if records.is_empty() { + return Ok(()); + } + + let tx_guard = db.begin_guard(false)?; + for record in &records { + apply_wal_record_idempotent(db, record)?; + } + + tx_guard.commit() +} + +fn parse_wal_records(wal_bytes: &[u8]) -> Result> { + let mut offset = 0usize; + let mut records = Vec::new(); + + while offset < wal_bytes.len() { + let record = parse_wal_record(wal_bytes, offset).ok_or_else(|| { + KiteError::InvalidReplication(format!( + "invalid WAL payload in replication frame at offset {offset}" + )) + })?; + + if record.record_end <= offset { + return Err(KiteError::InvalidReplication( + "non-progressing WAL record parse in replication payload".to_string(), + )); + } + + offset = record.record_end; + records.push(record); + } + + Ok(records) +} + +fn apply_wal_record_idempotent(db: &SingleFileDB, record: &ParsedWalRecord) -> Result<()> { + match record.record_type { + WalRecordType::Begin | WalRecordType::Commit | WalRecordType::Rollback => Ok(()), + WalRecordType::CreateNode => { + let data = parse_create_node_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid CreateNode replication payload".to_string()) + })?; + + if db.node_exists(data.node_id) { + if db.node_key(data.node_id) == data.key { + return Ok(()); + } + return Err(KiteError::InvalidReplication(format!( + "create-node replay key mismatch for node {}", + data.node_id + ))); + } + + db.create_node_with_id(data.node_id, data.key.as_deref())?; + Ok(()) + } + WalRecordType::CreateNodesBatch => { + let entries = parse_create_nodes_batch_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid CreateNodesBatch replication payload".to_string()) + })?; + + for entry in entries { + if db.node_exists(entry.node_id) { + if db.node_key(entry.node_id) != entry.key { + return Err(KiteError::InvalidReplication(format!( + "create-nodes-batch replay key mismatch for node {}", + entry.node_id + ))); + } + continue; + } + + db.create_node_with_id(entry.node_id, entry.key.as_deref())?; + } + + Ok(()) + } + WalRecordType::DeleteNode => { + let data = parse_delete_node_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid DeleteNode replication payload".to_string()) + })?; + if db.node_exists(data.node_id) { + let _ = db.delete_node(data.node_id)?; + } + Ok(()) + } + WalRecordType::AddEdge => { + let data = parse_add_edge_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid AddEdge replication payload".to_string()) + })?; + if !db.edge_exists(data.src, data.etype, data.dst) { + db.add_edge(data.src, data.etype, data.dst)?; + } + Ok(()) + } + WalRecordType::DeleteEdge => { + let data = parse_delete_edge_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid DeleteEdge replication payload".to_string()) + })?; + if db.edge_exists(data.src, data.etype, data.dst) { + db.delete_edge(data.src, data.etype, data.dst)?; + } + Ok(()) + } + WalRecordType::AddEdgesBatch => { + let batch = parse_add_edges_batch_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid AddEdgesBatch replication payload".to_string()) + })?; + + for edge in batch { + if !db.edge_exists(edge.src, edge.etype, edge.dst) { + db.add_edge(edge.src, edge.etype, edge.dst)?; + } + } + Ok(()) + } + WalRecordType::AddEdgeProps => { + let data = parse_add_edge_props_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid AddEdgeProps replication payload".to_string()) + })?; + + if !db.edge_exists(data.src, data.etype, data.dst) { + db.add_edge(data.src, data.etype, data.dst)?; + } + + for (key_id, value) in data.props { + if db.edge_prop(data.src, data.etype, data.dst, key_id) != Some(value.clone()) { + db.set_edge_prop(data.src, data.etype, data.dst, key_id, value)?; + } + } + Ok(()) + } + WalRecordType::AddEdgesPropsBatch => { + let batch = parse_add_edges_props_batch_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid AddEdgesPropsBatch replication payload".to_string()) + })?; + + for entry in batch { + if !db.edge_exists(entry.src, entry.etype, entry.dst) { + db.add_edge(entry.src, entry.etype, entry.dst)?; + } + + for (key_id, value) in entry.props { + if db.edge_prop(entry.src, entry.etype, entry.dst, key_id) != Some(value.clone()) { + db.set_edge_prop(entry.src, entry.etype, entry.dst, key_id, value)?; + } + } + } + + Ok(()) + } + WalRecordType::SetNodeProp => { + let data = parse_set_node_prop_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid SetNodeProp replication payload".to_string()) + })?; + + if db.node_prop(data.node_id, data.key_id) != Some(data.value.clone()) { + db.set_node_prop(data.node_id, data.key_id, data.value)?; + } + + Ok(()) + } + WalRecordType::DelNodeProp => { + let data = parse_del_node_prop_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid DelNodeProp replication payload".to_string()) + })?; + + if db.node_prop(data.node_id, data.key_id).is_some() { + db.delete_node_prop(data.node_id, data.key_id)?; + } + Ok(()) + } + WalRecordType::SetEdgeProp => { + let data = parse_set_edge_prop_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid SetEdgeProp replication payload".to_string()) + })?; + + if db.edge_prop(data.src, data.etype, data.dst, data.key_id) != Some(data.value.clone()) { + db.set_edge_prop(data.src, data.etype, data.dst, data.key_id, data.value)?; + } + Ok(()) + } + WalRecordType::SetEdgeProps => { + let data = parse_set_edge_props_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid SetEdgeProps replication payload".to_string()) + })?; + + for (key_id, value) in data.props { + if db.edge_prop(data.src, data.etype, data.dst, key_id) != Some(value.clone()) { + db.set_edge_prop(data.src, data.etype, data.dst, key_id, value)?; + } + } + Ok(()) + } + WalRecordType::DelEdgeProp => { + let data = parse_del_edge_prop_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid DelEdgeProp replication payload".to_string()) + })?; + + if db + .edge_prop(data.src, data.etype, data.dst, data.key_id) + .is_some() + { + db.delete_edge_prop(data.src, data.etype, data.dst, data.key_id)?; + } + Ok(()) + } + WalRecordType::AddNodeLabel => { + let data = parse_add_node_label_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid AddNodeLabel replication payload".to_string()) + })?; + + if !db.node_has_label(data.node_id, data.label_id) { + db.add_node_label(data.node_id, data.label_id)?; + } + Ok(()) + } + WalRecordType::RemoveNodeLabel => { + let data = parse_remove_node_label_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid RemoveNodeLabel replication payload".to_string()) + })?; + + if db.node_has_label(data.node_id, data.label_id) { + db.remove_node_label(data.node_id, data.label_id)?; + } + Ok(()) + } + WalRecordType::SetNodeVector => { + let data = parse_set_node_vector_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid SetNodeVector replication payload".to_string()) + })?; + + let current = db.node_vector(data.node_id, data.prop_key_id); + if current.as_deref().map(|v| v.as_ref()) != Some(data.vector.as_slice()) { + db.set_node_vector(data.node_id, data.prop_key_id, &data.vector)?; + } + Ok(()) + } + WalRecordType::DelNodeVector => { + let data = parse_del_node_vector_payload(&record.payload).ok_or_else(|| { + KiteError::InvalidReplication("invalid DelNodeVector replication payload".to_string()) + })?; + + if db.has_node_vector(data.node_id, data.prop_key_id) { + db.delete_node_vector(data.node_id, data.prop_key_id)?; + } + Ok(()) + } + WalRecordType::DefineLabel | WalRecordType::DefineEtype | WalRecordType::DefinePropkey => { + // IDs are embedded in mutation records; numeric IDs are sufficient for correctness + // during V1 replication apply. + Ok(()) + } + WalRecordType::BatchVectors | WalRecordType::SealFragment | WalRecordType::CompactFragments => { + // Vector batch and maintenance records are derived/index-management artifacts. + // Replica correctness is defined by logical graph + property mutations, including + // SetNodeVector/DelNodeVector records, so these can be skipped safely. + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::apply_wal_record_idempotent; + use crate::core::single_file::{close_single_file, open_single_file, SingleFileOpenOptions}; + use crate::core::wal::record::ParsedWalRecord; + use crate::types::WalRecordType; + + #[test] + fn replica_apply_ignores_vector_maintenance_records() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("replica-apply-vector-maintenance.kitedb"); + let db = open_single_file(&db_path, SingleFileOpenOptions::new()).expect("open db"); + + for record_type in [ + WalRecordType::BatchVectors, + WalRecordType::SealFragment, + WalRecordType::CompactFragments, + ] { + let record = ParsedWalRecord { + record_type, + flags: 0, + txid: 1, + payload: Vec::new(), + record_end: 0, + }; + apply_wal_record_idempotent(&db, &record) + .expect("derived vector maintenance should be ignored"); + } + + assert_eq!(db.count_nodes(), 0); + assert_eq!(db.count_edges(), 0); + close_single_file(db).expect("close db"); + } +} diff --git a/ray-rs/src/core/single_file/transaction.rs b/ray-rs/src/core/single_file/transaction.rs index 6e8b968..21f8b85 100644 --- a/ray-rs/src/core/single_file/transaction.rs +++ b/ray-rs/src/core/single_file/transaction.rs @@ -6,6 +6,8 @@ use crate::core::wal::record::{ build_begin_payload, build_commit_payload, build_rollback_payload, WalRecord, }; use crate::error::{KiteError, Result}; +use crate::replication::primary::PrimaryReplicationStatus; +use crate::replication::types::CommitToken; use crate::types::*; use parking_lot::Mutex; use std::marker::PhantomData; @@ -366,6 +368,11 @@ impl SingleFileDB { /// Commit the current transaction pub fn commit(&self) -> Result<()> { + self.commit_with_token().map(|_| ()) + } + + /// Commit the current transaction and return replication commit token if enabled. + pub fn commit_with_token(&self) -> Result> { let tx_handle = { let tid = std::thread::current().id(); let mut current_tx = self.current_tx.lock(); @@ -385,7 +392,7 @@ impl SingleFileDB { let mut tx_mgr = mvcc.tx_manager.lock(); tx_mgr.abort_tx(txid); } - return Ok(()); + return Ok(None); } let prev_writers = self.active_writers.fetch_sub(1, Ordering::SeqCst); debug_assert!(prev_writers > 0, "active_writers underflow in commit"); @@ -407,8 +414,11 @@ impl SingleFileDB { commit_ts_for_mvcc = Some((commit_ts, tx_mgr.active_count() > 0)); } - let group_commit_active = self.group_commit_enabled && self.sync_mode == SyncMode::Normal; + let replication_enabled = self.primary_replication.is_some(); + let group_commit_active = + self.group_commit_enabled && self.sync_mode == SyncMode::Normal && !replication_enabled; let mut group_commit_seq = 0u64; + let mut commit_token = None; { // Serialize commit to preserve WAL ordering without holding the delta lock during I/O. @@ -493,6 +503,10 @@ impl SingleFileDB { state.next_seq = state.next_seq.saturating_add(1); group_commit_seq = state.next_seq; } + + if let Some(replication) = self.primary_replication.as_ref() { + commit_token = Some(replication.append_commit_wal_frame(txid, pending_wal)?); + } } if group_commit_active { @@ -531,7 +545,7 @@ impl SingleFileDB { } } - Ok(()) + Ok(commit_token) } /// Rollback the current transaction @@ -587,6 +601,22 @@ impl SingleFileDB { self.current_tx_handle().as_ref().map(|tx| tx.lock().txid) } + /// Get the most recently emitted commit token from primary replication. + pub fn last_commit_token(&self) -> Option { + self + .primary_replication + .as_ref() + .and_then(|replication| replication.last_token()) + } + + /// Get primary replication status when replication role is `primary`. + pub fn primary_replication_status(&self) -> Option { + self + .primary_replication + .as_ref() + .map(|replication| replication.status()) + } + /// Write a WAL record (internal helper) pub(crate) fn write_wal(&self, record: WalRecord) -> Result<()> { let mut pager = self.pager.lock(); @@ -601,13 +631,16 @@ impl SingleFileDB { record: WalRecord, ) -> Result<()> { let mut tx = tx_handle.lock(); + let record_bytes = record.build(); if tx.bulk_load { - let record_bytes = record.build(); tx.pending_wal.extend_from_slice(&record_bytes); Ok(()) } else { drop(tx); - self.write_wal(record) + self.write_wal(record)?; + let mut tx = tx_handle.lock(); + tx.pending_wal.extend_from_slice(&record_bytes); + Ok(()) } } diff --git a/ray-rs/src/core/single_file/vector.rs b/ray-rs/src/core/single_file/vector.rs index 22804c5..a43cb73 100644 --- a/ray-rs/src/core/single_file/vector.rs +++ b/ray-rs/src/core/single_file/vector.rs @@ -8,6 +8,8 @@ use crate::core::wal::record::{ }; use crate::error::{KiteError, Result}; use crate::types::*; +use crate::util::binary::{read_u32, read_u64}; +use crate::vector::ivf::serialize::deserialize_manifest; use crate::vector::store::{ create_vector_store, validate_vector, vector_store_delete, vector_store_has, vector_store_insert, vector_store_node_vector, @@ -18,7 +20,62 @@ use std::sync::Arc; use super::SingleFileDB; +#[derive(Debug, Clone)] +pub(crate) struct VectorStoreLazyEntry { + pub(crate) offset: usize, + pub(crate) len: usize, +} + impl SingleFileDB { + pub(crate) fn ensure_vector_store_loaded(&self, prop_key_id: PropKeyId) -> Result<()> { + if self.vector_stores.read().contains_key(&prop_key_id) { + return Ok(()); + } + + let entry = { + let lazy_entries = self.vector_store_lazy_entries.read(); + lazy_entries.get(&prop_key_id).cloned() + }; + let Some(entry) = entry else { + return Ok(()); + }; + + let manifest = { + let snapshot_guard = self.snapshot.read(); + let snapshot = snapshot_guard.as_ref().ok_or_else(|| { + KiteError::Internal("lazy vector-store entry present without loaded snapshot".to_string()) + })?; + deserialize_vector_store_entry(snapshot, prop_key_id, &entry)? + }; + + { + let mut stores = self.vector_stores.write(); + stores.entry(prop_key_id).or_insert(manifest); + } + self.vector_store_lazy_entries.write().remove(&prop_key_id); + Ok(()) + } + + pub(crate) fn materialize_all_vector_stores(&self) -> Result<()> { + let prop_keys: Vec = self + .vector_store_lazy_entries + .read() + .keys() + .copied() + .collect(); + for prop_key_id in prop_keys { + self.ensure_vector_store_loaded(prop_key_id)?; + } + Ok(()) + } + + pub(crate) fn vector_prop_keys(&self) -> std::collections::HashSet { + let mut keys: std::collections::HashSet = + self.vector_stores.read().keys().copied().collect(); + keys.extend(self.vector_store_lazy_entries.read().keys().copied()); + keys + } + /// Set a vector embedding for a node /// /// Each property key can have its own vector store with different dimensions. @@ -30,6 +87,7 @@ impl SingleFileDB { vector: &[f32], ) -> Result<()> { let (txid, tx_handle) = self.require_write_tx_handle()?; + self.ensure_vector_store_loaded(prop_key_id)?; // Check dimensions if store already exists { @@ -141,6 +199,10 @@ impl SingleFileDB { return pending.as_ref().map(Arc::clone); } + if self.ensure_vector_store_loaded(prop_key_id).is_err() { + return None; + } + // Fall back to committed storage let stores = self.vector_stores.read(); let store = stores.get(&prop_key_id)?; @@ -172,6 +234,10 @@ impl SingleFileDB { return pending.is_some(); } + if self.ensure_vector_store_loaded(prop_key_id).is_err() { + return false; + } + // Fall back to committed storage let stores = self.vector_stores.read(); if let Some(store) = stores.get(&prop_key_id) { @@ -185,6 +251,8 @@ impl SingleFileDB { /// /// Creates a new store with the given dimensions if it doesn't exist. pub fn vector_store_or_create(&self, prop_key_id: PropKeyId, dimensions: usize) -> Result<()> { + self.ensure_vector_store_loaded(prop_key_id)?; + let mut stores = self.vector_stores.write(); if stores.contains_key(&prop_key_id) { let store = stores.get(&prop_key_id).ok_or_else(|| { @@ -202,6 +270,7 @@ impl SingleFileDB { let config = VectorStoreConfig::new(dimensions); let manifest = create_vector_store(config); stores.insert(prop_key_id, manifest); + self.vector_store_lazy_entries.write().remove(&prop_key_id); Ok(()) } @@ -210,6 +279,14 @@ impl SingleFileDB { &self, pending_vectors: &HashMap<(NodeId, PropKeyId), Option>, ) -> Result<()> { + let mut prop_keys = std::collections::HashSet::new(); + for &(_node_id, prop_key_id) in pending_vectors.keys() { + prop_keys.insert(prop_key_id); + } + for prop_key_id in prop_keys { + self.ensure_vector_store_loaded(prop_key_id)?; + } + let mut stores = self.vector_stores.write(); for (&(node_id, prop_key_id), operation) in pending_vectors { @@ -220,6 +297,7 @@ impl SingleFileDB { let config = VectorStoreConfig::new(vector.len()); create_vector_store(config) }); + self.vector_store_lazy_entries.write().remove(&prop_key_id); // Insert (this handles replacement of existing vectors) vector_store_insert(store, node_id, vector.as_ref()).map_err(|e| { @@ -241,13 +319,37 @@ impl SingleFileDB { } } -pub(crate) fn vector_stores_from_snapshot( +pub(crate) fn vector_store_state_from_snapshot( snapshot: &SnapshotData, -) -> Result> { - let mut stores: HashMap = HashMap::new(); +) -> Result<( + HashMap, + HashMap, +)> { + if !snapshot + .header + .flags + .contains(SnapshotFlags::HAS_VECTOR_STORES) + && !snapshot.header.flags.contains(SnapshotFlags::HAS_VECTORS) + { + return Ok((HashMap::new(), HashMap::new())); + } + + if snapshot + .header + .flags + .contains(SnapshotFlags::HAS_VECTOR_STORES) + { + let lazy_entries = vector_store_lazy_entries_from_sections(snapshot)?; + return Ok((HashMap::new(), lazy_entries)); + } + + let mut stores = vector_stores_from_sections(snapshot)?; + if !stores.is_empty() { + return Ok((stores, HashMap::new())); + } if !snapshot.header.flags.contains(SnapshotFlags::HAS_VECTORS) { - return Ok(stores); + return Ok((stores, HashMap::new())); } let num_nodes = snapshot.header.num_nodes as usize; @@ -285,14 +387,175 @@ pub(crate) fn vector_stores_from_snapshot( } } + Ok((stores, HashMap::new())) +} + +pub(crate) fn vector_stores_from_snapshot( + snapshot: &SnapshotData, +) -> Result> { + let (stores, lazy_entries) = vector_store_state_from_snapshot(snapshot)?; + if lazy_entries.is_empty() { + return Ok(stores); + } + + let mut materialized = stores; + for (prop_key_id, entry) in lazy_entries { + let manifest = deserialize_vector_store_entry(snapshot, prop_key_id, &entry)?; + materialized.insert(prop_key_id, manifest); + } + Ok(materialized) +} + +pub(crate) fn materialize_vector_store_from_lazy_entries( + snapshot: &SnapshotData, + vector_stores: &mut HashMap, + lazy_entries: &mut HashMap, + prop_key_id: PropKeyId, +) -> Result<()> { + if vector_stores.contains_key(&prop_key_id) { + return Ok(()); + } + let Some(entry) = lazy_entries.remove(&prop_key_id) else { + return Ok(()); + }; + let manifest = deserialize_vector_store_entry(snapshot, prop_key_id, &entry)?; + vector_stores.insert(prop_key_id, manifest); + Ok(()) +} + +fn vector_stores_from_sections( + snapshot: &SnapshotData, +) -> Result> { + let lazy_entries = vector_store_lazy_entries_from_sections(snapshot)?; + if lazy_entries.is_empty() { + return Ok(HashMap::new()); + } + + let mut stores: HashMap = HashMap::new(); + for (prop_key_id, entry) in lazy_entries { + let manifest = deserialize_vector_store_entry(snapshot, prop_key_id, &entry)?; + stores.insert(prop_key_id, manifest); + } Ok(stores) } +fn vector_store_lazy_entries_from_sections( + snapshot: &SnapshotData, +) -> Result> { + let mut entries: HashMap = HashMap::new(); + let Some(index_bytes) = snapshot.section_data_shared(SectionId::VectorStoreIndex) else { + return Ok(entries); + }; + let Some(blob_bytes) = snapshot.section_data_shared(SectionId::VectorStoreData) else { + return Err(KiteError::InvalidSnapshot( + "Vector store index present but vector store blob section is missing".to_string(), + )); + }; + + let index_bytes = index_bytes.as_ref(); + let blob_len = blob_bytes.as_ref().len(); + + if index_bytes.len() < 4 { + return Err(KiteError::InvalidSnapshot( + "Vector store index section too small".to_string(), + )); + } + + let count = read_u32(index_bytes, 0) as usize; + let expected_len = 4usize + .checked_add(count.saturating_mul(20)) + .ok_or_else(|| KiteError::InvalidSnapshot("Vector store index size overflow".to_string()))?; + if index_bytes.len() < expected_len { + return Err(KiteError::InvalidSnapshot(format!( + "Vector store index truncated: expected at least {expected_len} bytes, found {}", + index_bytes.len() + ))); + } + + for i in 0..count { + let entry_offset = 4 + i * 20; + let prop_key_id = read_u32(index_bytes, entry_offset); + let payload_offset = read_u64(index_bytes, entry_offset + 4) as usize; + let payload_len = read_u64(index_bytes, entry_offset + 12) as usize; + let payload_end = payload_offset.checked_add(payload_len).ok_or_else(|| { + KiteError::InvalidSnapshot(format!( + "Vector store entry {i} overflow: offset={payload_offset}, len={payload_len}" + )) + })?; + if payload_end > blob_len { + return Err(KiteError::InvalidSnapshot(format!( + "Vector store entry {i} out of bounds: {}..{} exceeds blob size {}", + payload_offset, payload_end, blob_len + ))); + } + + let entry = VectorStoreLazyEntry { + offset: payload_offset, + len: payload_len, + }; + if entries.insert(prop_key_id, entry).is_some() { + return Err(KiteError::InvalidSnapshot(format!( + "Duplicate vector store entry for prop key {prop_key_id}" + ))); + } + } + + Ok(entries) +} + +fn deserialize_vector_store_entry( + snapshot: &SnapshotData, + prop_key_id: PropKeyId, + entry: &VectorStoreLazyEntry, +) -> Result { + let blob_bytes = snapshot + .section_data_shared(SectionId::VectorStoreData) + .ok_or_else(|| { + KiteError::InvalidSnapshot( + "Vector store entry present but vector store blob section is missing".to_string(), + ) + })?; + let blob_bytes = blob_bytes.as_ref(); + + let payload_end = entry.offset.checked_add(entry.len).ok_or_else(|| { + KiteError::InvalidSnapshot(format!( + "Vector store entry overflow for prop key {prop_key_id}: offset={}, len={}", + entry.offset, entry.len + )) + })?; + if payload_end > blob_bytes.len() { + return Err(KiteError::InvalidSnapshot(format!( + "Vector store entry for prop key {prop_key_id} out of bounds: {}..{} exceeds blob size {}", + entry.offset, + payload_end, + blob_bytes.len() + ))); + } + + deserialize_manifest(&blob_bytes[entry.offset..payload_end]).map_err(|err| { + KiteError::InvalidSnapshot(format!( + "Failed to deserialize vector store for prop key {prop_key_id}: {err}" + )) + }) +} + #[cfg(test)] mod tests { + use super::{vector_store_state_from_snapshot, vector_stores_from_snapshot}; use crate::core::single_file::{close_single_file, open_single_file, SingleFileOpenOptions}; + use crate::core::snapshot::reader::SnapshotData; + use crate::core::snapshot::writer::{build_snapshot_to_memory, NodeData, SnapshotBuildInput}; + use crate::types::{ + PropValue, SectionId, SnapshotFlags, SECTION_ENTRY_SIZE, SNAPSHOT_HEADER_SIZE, + }; + use crate::util::binary::{read_u64, write_u32, write_u64}; + use crate::util::crc::crc32c; use crate::vector::distance::normalize; - use tempfile::tempdir; + use crate::vector::store::{create_vector_store, vector_store_has, vector_store_insert}; + use crate::vector::types::VectorStoreConfig; + use std::collections::HashMap; + use std::io::Write; + use tempfile::{tempdir, NamedTempFile}; #[test] fn test_set_node_vector_rejects_invalid_vectors() { @@ -350,6 +613,36 @@ mod tests { close_single_file(db).expect("expected value"); } + #[test] + fn test_open_keeps_vector_store_lazy_until_first_access() { + let temp_dir = tempdir().expect("expected value"); + let db_path = temp_dir.path().join("vectors-lazy-open.kitedb"); + + let db = open_single_file(&db_path, SingleFileOpenOptions::new()).expect("expected value"); + db.begin(false).expect("expected value"); + let node_id = db.create_node(None).expect("expected value"); + let prop_key_id = db.define_propkey("embedding").expect("expected value"); + db.set_node_vector(node_id, prop_key_id, &[0.1, 0.2, 0.3]) + .expect("expected value"); + db.commit().expect("expected value"); + db.checkpoint().expect("expected value"); + close_single_file(db).expect("expected value"); + + let db = open_single_file(&db_path, SingleFileOpenOptions::new()).expect("expected value"); + assert!(db.vector_stores.read().is_empty()); + + let vec = db + .node_vector(node_id, prop_key_id) + .expect("expected value"); + let expected = normalize(&[0.1, 0.2, 0.3]); + assert_eq!(vec.len(), expected.len()); + for (got, exp) in vec.iter().zip(expected.iter()) { + assert!((got - exp).abs() < 1e-6); + } + assert!(db.vector_stores.read().contains_key(&prop_key_id)); + close_single_file(db).expect("expected value"); + } + #[test] fn test_vector_persistence_across_wal_replay() { let temp_dir = tempdir().expect("expected value"); @@ -377,4 +670,145 @@ mod tests { } close_single_file(db).expect("expected value"); } + + #[test] + fn test_vector_store_sections_round_trip() { + let mut manifest = create_vector_store(VectorStoreConfig::new(3)); + vector_store_insert(&mut manifest, 42, &[0.1, 0.2, 0.3]).expect("expected value"); + + let mut stores = HashMap::new(); + stores.insert(7, manifest); + + let mut propkeys = HashMap::new(); + propkeys.insert(7, "embedding".to_string()); + + let buffer = build_snapshot_to_memory(SnapshotBuildInput { + generation: 1, + nodes: vec![NodeData { + node_id: 42, + key: None, + labels: vec![], + props: HashMap::new(), + }], + edges: Vec::new(), + labels: HashMap::new(), + etypes: HashMap::new(), + propkeys, + vector_stores: Some(stores), + compression: None, + }) + .expect("expected value"); + + let mut tmp = NamedTempFile::new().expect("expected value"); + tmp.write_all(&buffer).expect("expected value"); + tmp.flush().expect("expected value"); + + let snapshot = SnapshotData::load(tmp.path()).expect("expected value"); + assert!(snapshot + .header + .flags + .contains(SnapshotFlags::HAS_VECTOR_STORES)); + assert!(!snapshot.header.flags.contains(SnapshotFlags::HAS_VECTORS)); + + let loaded = vector_stores_from_snapshot(&snapshot).expect("expected value"); + let loaded_manifest = loaded.get(&7).expect("expected value"); + assert!(vector_store_has(loaded_manifest, 42)); + + // Verify the legacy property path remains empty when vectors are only + // materialized via persisted vector-store sections. + let phys = snapshot.phys_node(42).expect("expected value"); + assert!(!matches!( + snapshot.node_prop(phys, 7), + Some(PropValue::VectorF32(_)) + )); + } + + #[test] + fn test_checkpoint_does_not_duplicate_vectors_into_node_props() { + let temp_dir = tempdir().expect("expected value"); + let db_path = temp_dir.path().join("vectors-no-dup-node-prop.kitedb"); + + let db = open_single_file(&db_path, SingleFileOpenOptions::new()).expect("expected value"); + db.begin(false).expect("expected value"); + let node_id = db.create_node(None).expect("expected value"); + let prop_key_id = db.define_propkey("embedding").expect("expected value"); + db.set_node_vector(node_id, prop_key_id, &[0.1, 0.2, 0.3]) + .expect("expected value"); + db.commit().expect("expected value"); + db.checkpoint().expect("expected value"); + close_single_file(db).expect("expected value"); + + let db = open_single_file(&db_path, SingleFileOpenOptions::new()).expect("expected value"); + let snapshot_guard = db.snapshot.read(); + let snapshot = snapshot_guard.as_ref().expect("expected value"); + let phys = snapshot.phys_node(node_id).expect("expected value"); + assert!(!matches!( + snapshot.node_prop(phys, prop_key_id), + Some(PropValue::VectorF32(_)) + )); + drop(snapshot_guard); + close_single_file(db).expect("expected value"); + } + + #[test] + fn test_no_vector_flags_ignore_vector_sections() { + let mut buffer = build_snapshot_to_memory(SnapshotBuildInput { + generation: 1, + nodes: vec![NodeData { + node_id: 1, + key: None, + labels: vec![], + props: HashMap::new(), + }], + edges: Vec::new(), + labels: HashMap::new(), + etypes: HashMap::new(), + propkeys: HashMap::new(), + vector_stores: None, + compression: None, + }) + .expect("expected value"); + + // Corrupt-insert a VectorStoreIndex section table entry while keeping + // HAS_VECTOR_STORES/HAS_VECTORS flags unset. Loader should ignore it. + let entry_offset = + SNAPSHOT_HEADER_SIZE + (SectionId::VectorStoreIndex as usize) * SECTION_ENTRY_SIZE; + let mut prev_end = 0u64; + for section_idx in 0..(SectionId::VectorStoreIndex as usize) { + let sec_entry = SNAPSHOT_HEADER_SIZE + section_idx * SECTION_ENTRY_SIZE; + let offset = read_u64(&buffer, sec_entry); + let len = read_u64(&buffer, sec_entry + 8); + if len > 0 { + prev_end = prev_end.max(offset + len); + } + } + let fake_payload_offset = ((prev_end + 63) / 64) * 64; + let required_size = (((fake_payload_offset + 1 + 63) / 64) * 64 + 4) as usize; + if buffer.len() < required_size { + buffer.resize(required_size, 0); + } + write_u64(&mut buffer, entry_offset, fake_payload_offset); + write_u64(&mut buffer, entry_offset + 8, 1); + write_u32(&mut buffer, entry_offset + 16, 0); + write_u32(&mut buffer, entry_offset + 20, 1); + let crc_offset = buffer.len() - 4; + let crc = crc32c(&buffer[..crc_offset]); + write_u32(&mut buffer, crc_offset, crc); + + let mut tmp = NamedTempFile::new().expect("expected value"); + tmp.write_all(&buffer).expect("expected value"); + tmp.flush().expect("expected value"); + + let snapshot = SnapshotData::load(tmp.path()).expect("expected value"); + assert!(!snapshot + .header + .flags + .contains(SnapshotFlags::HAS_VECTOR_STORES)); + assert!(!snapshot.header.flags.contains(SnapshotFlags::HAS_VECTORS)); + + let (stores, lazy_entries) = + vector_store_state_from_snapshot(&snapshot).expect("expected value"); + assert!(stores.is_empty()); + assert!(lazy_entries.is_empty()); + } } diff --git a/ray-rs/src/core/snapshot/sections.rs b/ray-rs/src/core/snapshot/sections.rs index dff9b62..9043f70 100644 --- a/ray-rs/src/core/snapshot/sections.rs +++ b/ray-rs/src/core/snapshot/sections.rs @@ -17,8 +17,10 @@ pub struct ParsedSections { /// Resolve section table size for a snapshot version. pub fn section_count_for_version(version: u32) -> usize { - if version >= 3 { + if version >= 4 { SectionId::COUNT + } else if version >= 3 { + SectionId::COUNT_V3 } else if version >= 2 { SectionId::COUNT_V2 } else { @@ -169,6 +171,7 @@ mod tests { labels: HashMap::new(), etypes: HashMap::new(), propkeys: HashMap::new(), + vector_stores: None, compression: None, }) .expect("snapshot build") diff --git a/ray-rs/src/core/snapshot/writer.rs b/ray-rs/src/core/snapshot/writer.rs index 3dff132..010c415 100644 --- a/ray-rs/src/core/snapshot/writer.rs +++ b/ray-rs/src/core/snapshot/writer.rs @@ -10,6 +10,8 @@ use crate::util::binary::*; use crate::util::compression::{maybe_compress, CompressionOptions, CompressionType}; use crate::util::crc::crc32c; use crate::util::hash::xxhash64_string; +use crate::vector::ivf::serialize::serialize_manifest; +use crate::vector::types::VectorManifest; use std::collections::HashMap; // ============================================================================ @@ -43,6 +45,7 @@ pub struct SnapshotBuildInput { pub labels: HashMap, pub etypes: HashMap, pub propkeys: HashMap, + pub vector_stores: Option>, pub compression: Option, } @@ -835,6 +838,42 @@ fn add_vector_sections( true } +fn add_vector_store_sections( + add_section: &mut impl FnMut(SectionId, Vec), + vector_stores: Option<&HashMap>, +) -> bool { + let Some(vector_stores) = vector_stores else { + return false; + }; + if vector_stores.is_empty() { + return false; + } + + let mut ordered: Vec<(PropKeyId, &VectorManifest)> = + vector_stores.iter().map(|(&k, v)| (k, v)).collect(); + ordered.sort_by_key(|(prop_key_id, _)| *prop_key_id); + + let mut index_data = vec![0u8; 4 + ordered.len() * 20]; + write_u32(&mut index_data, 0, ordered.len() as u32); + let mut blob_data = Vec::new(); + + for (i, (prop_key_id, manifest)) in ordered.iter().enumerate() { + let encoded = serialize_manifest(manifest); + let offset = blob_data.len() as u64; + let length = encoded.len() as u64; + blob_data.extend_from_slice(&encoded); + + let entry_offset = 4 + i * 20; + write_u32(&mut index_data, entry_offset, *prop_key_id); + write_u64(&mut index_data, entry_offset + 4, offset); + write_u64(&mut index_data, entry_offset + 12, length); + } + + add_section(SectionId::VectorStoreIndex, index_data); + add_section(SectionId::VectorStoreData, blob_data); + true +} + // ============================================================================ // Main snapshot building // ============================================================================ @@ -851,6 +890,7 @@ pub fn build_snapshot_to_memory(input: SnapshotBuildInput) -> Result> { labels, etypes, propkeys, + vector_stores, compression, } = input; @@ -866,10 +906,16 @@ pub fn build_snapshot_to_memory(input: SnapshotBuildInput) -> Result> { let num_strings = state.string_table.len(); let mut add_section = |id: SectionId, data: Vec| { - let (compressed, compression_type) = maybe_compress(&data, &compression_opts); + let uncompressed_size = data.len() as u32; + let (compressed, compression_type) = + if matches!(id, SectionId::VectorStoreIndex | SectionId::VectorStoreData) { + (data, CompressionType::None) + } else { + maybe_compress(&data, &compression_opts) + }; section_data.push(SectionData { id, - uncompressed_size: data.len() as u32, + uncompressed_size, data: compressed, compression: compression_type, }); @@ -922,6 +968,7 @@ pub fn build_snapshot_to_memory(input: SnapshotBuildInput) -> Result> { ); let has_vectors = add_vector_sections(&mut add_section, vector_table); + let has_vector_stores = add_vector_store_sections(&mut add_section, vector_stores.as_ref()); // Calculate total size and offsets let header_size = SNAPSHOT_HEADER_SIZE; @@ -965,6 +1012,9 @@ pub fn build_snapshot_to_memory(input: SnapshotBuildInput) -> Result> { if has_vectors { flags |= SnapshotFlags::HAS_VECTORS; } + if has_vector_stores { + flags |= SnapshotFlags::HAS_VECTOR_STORES; + } write_u32(&mut buffer, offset, flags.bits()); offset += 4; @@ -1034,7 +1084,11 @@ pub fn build_snapshot_to_memory(input: SnapshotBuildInput) -> Result> { #[cfg(test)] mod tests { use super::*; + use crate::core::snapshot::reader::SnapshotData; + use crate::util::compression::{CompressionOptions, CompressionType}; use crate::util::crc::crc32c; + use crate::vector::store::{create_vector_store, vector_store_insert}; + use crate::vector::types::VectorStoreConfig; use std::io::Write; use tempfile::NamedTempFile; @@ -1117,6 +1171,7 @@ mod tests { labels, etypes, propkeys, + vector_stores: None, compression: None, } } @@ -1183,6 +1238,54 @@ mod tests { } } + #[test] + fn test_vector_store_sections_forced_uncompressed() { + let mut manifest = create_vector_store(VectorStoreConfig::new(64)); + for node_id in 1..=1024u64 { + let mut vector = vec![0.0f32; 64]; + vector[(node_id as usize) % 64] = 1.0; + vector_store_insert(&mut manifest, node_id, &vector).expect("expected value"); + } + + let mut stores = HashMap::new(); + stores.insert(7, manifest); + + let mut propkeys = HashMap::new(); + propkeys.insert(7, "embedding".to_string()); + + let buffer = build_snapshot_to_memory(SnapshotBuildInput { + generation: 1, + nodes: vec![NodeData { + node_id: 1, + key: None, + labels: vec![], + props: HashMap::new(), + }], + edges: Vec::new(), + labels: HashMap::new(), + etypes: HashMap::new(), + propkeys, + vector_stores: Some(stores), + compression: Some(CompressionOptions { + enabled: true, + compression_type: CompressionType::Zstd, + min_size: 1, + level: 3, + }), + }) + .expect("expected value"); + + let mut tmp = NamedTempFile::new().expect("expected value"); + tmp.write_all(&buffer).expect("expected value"); + tmp.flush().expect("expected value"); + + let snapshot = SnapshotData::load(tmp.path()).expect("expected value"); + assert!(snapshot + .section_slice(SectionId::VectorStoreIndex) + .is_some()); + assert!(snapshot.section_slice(SectionId::VectorStoreData).is_some()); + } + #[test] fn test_build_empty_snapshot() { let input = SnapshotBuildInput { @@ -1192,6 +1295,7 @@ mod tests { labels: HashMap::new(), etypes: HashMap::new(), propkeys: HashMap::new(), + vector_stores: None, compression: None, }; @@ -1224,6 +1328,7 @@ mod tests { labels: HashMap::new(), etypes, propkeys: HashMap::new(), + vector_stores: None, compression: None, }; diff --git a/ray-rs/src/error.rs b/ray-rs/src/error.rs index 781eee9..e2e030b 100644 --- a/ray-rs/src/error.rs +++ b/ray-rs/src/error.rs @@ -116,6 +116,10 @@ pub enum KiteError { /// Invalid query or builder usage #[error("Invalid query: {0}")] InvalidQuery(Cow<'static, str>), + + /// Replication metadata/record validation failure + #[error("Invalid replication state: {0}")] + InvalidReplication(String), } /// Result type alias for KiteDB operations diff --git a/ray-rs/src/lib.rs b/ray-rs/src/lib.rs index 7c51bf7..6c7effe 100644 --- a/ray-rs/src/lib.rs +++ b/ray-rs/src/lib.rs @@ -52,6 +52,9 @@ pub mod export; // Streaming/pagination pub mod streaming; +// Replication modules +pub mod replication; + // High-level API modules (Phase 6) pub mod api; diff --git a/ray-rs/src/metrics/mod.rs b/ray-rs/src/metrics/mod.rs index 25a0adf..f994f20 100644 --- a/ray-rs/src/metrics/mod.rs +++ b/ray-rs/src/metrics/mod.rs @@ -2,10 +2,47 @@ //! //! Core implementation used by bindings. -use std::time::SystemTime; +use std::collections::HashMap; +use std::fs::{self, File}; +use std::io::{BufReader, Write}; +use std::sync::Arc; +use std::sync::OnceLock; +use std::thread; +use std::time::{Duration, SystemTime}; + +use flate2::write::GzEncoder; +use flate2::Compression; +use opentelemetry_proto::tonic::collector::metrics::v1::metrics_service_client::MetricsServiceClient as OtelMetricsServiceClient; +use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest as OtelExportMetricsServiceRequest; +use opentelemetry_proto::tonic::common::v1::{ + any_value as otel_any_value, AnyValue as OtelAnyValue, + InstrumentationScope as OtelInstrumentationScope, KeyValue as OtelKeyValue, +}; +use opentelemetry_proto::tonic::metrics::v1::{ + metric as otel_metric, number_data_point as otel_number_data_point, + AggregationTemporality as OtelAggregationTemporality, Gauge as OtelGauge, Metric as OtelMetric, + NumberDataPoint as OtelNumberDataPoint, ResourceMetrics as OtelResourceMetrics, + ScopeMetrics as OtelScopeMetrics, Sum as OtelSum, +}; +use opentelemetry_proto::tonic::resource::v1::Resource as OtelResource; +use parking_lot::Mutex; +use prost::Message; +use rand::Rng; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use tonic::codec::CompressionEncoding as TonicCompressionEncoding; +use tonic::metadata::MetadataValue; +use tonic::transport::{ + Certificate as TonicCertificate, ClientTlsConfig, Endpoint as TonicEndpoint, + Identity as TonicIdentity, +}; +use tonic::Code as TonicCode; use crate::cache::manager::CacheManagerStats; use crate::core::single_file::SingleFileDB; +use crate::error::{KiteError, Result}; +use crate::replication::primary::PrimaryReplicationStatus; +use crate::replication::replica::ReplicaReplicationStatus; use crate::types::DeltaState; /// Cache layer metrics @@ -56,6 +93,41 @@ pub struct MvccMetrics { pub committed_writes_pruned: i64, } +/// Primary replication metrics +#[derive(Debug, Clone)] +pub struct PrimaryReplicationMetrics { + pub epoch: i64, + pub head_log_index: i64, + pub retained_floor: i64, + pub replica_count: i64, + pub stale_epoch_replica_count: i64, + pub max_replica_lag: i64, + pub min_replica_applied_log_index: Option, + pub sidecar_path: String, + pub last_token: Option, + pub append_attempts: i64, + pub append_failures: i64, + pub append_successes: i64, +} + +/// Replica replication metrics +#[derive(Debug, Clone)] +pub struct ReplicaReplicationMetrics { + pub applied_epoch: i64, + pub applied_log_index: i64, + pub needs_reseed: bool, + pub last_error: Option, +} + +/// Replication metrics +#[derive(Debug, Clone)] +pub struct ReplicationMetrics { + pub enabled: bool, + pub role: String, + pub primary: Option, + pub replica: Option, +} + /// Memory metrics #[derive(Debug, Clone)] pub struct MemoryMetrics { @@ -74,6 +146,7 @@ pub struct DatabaseMetrics { pub data: DataMetrics, pub cache: CacheMetrics, pub mvcc: Option, + pub replication: ReplicationMetrics, pub memory: MemoryMetrics, pub collected_at_ms: i64, } @@ -93,6 +166,91 @@ pub struct HealthCheckResult { pub checks: Vec, } +/// OTLP HTTP push result for replication metrics export. +#[derive(Debug, Clone)] +pub struct OtlpHttpExportResult { + pub status_code: i64, + pub response_body: String, +} + +/// TLS/mTLS options for OTLP HTTP push. +#[derive(Debug, Clone, Default)] +pub struct OtlpHttpTlsOptions { + pub https_only: bool, + pub ca_cert_pem_path: Option, + pub client_cert_pem_path: Option, + pub client_key_pem_path: Option, +} + +/// OTLP HTTP push options for collector export. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] +pub enum OtlpAdaptiveRetryMode { + #[default] + Linear, + Ewma, +} + +#[derive(Debug, Clone)] +pub struct OtlpHttpPushOptions { + pub timeout_ms: u64, + pub bearer_token: Option, + pub retry_max_attempts: u32, + pub retry_backoff_ms: u64, + pub retry_backoff_max_ms: u64, + pub retry_jitter_ratio: f64, + pub adaptive_retry_mode: OtlpAdaptiveRetryMode, + pub adaptive_retry_ewma_alpha: f64, + pub adaptive_retry: bool, + pub circuit_breaker_failure_threshold: u32, + pub circuit_breaker_open_ms: u64, + pub circuit_breaker_half_open_probes: u32, + pub circuit_breaker_state_path: Option, + pub circuit_breaker_state_url: Option, + pub circuit_breaker_state_patch: bool, + pub circuit_breaker_state_patch_batch: bool, + pub circuit_breaker_state_patch_batch_max_keys: u32, + pub circuit_breaker_state_patch_merge: bool, + pub circuit_breaker_state_patch_merge_max_keys: u32, + pub circuit_breaker_state_patch_retry_max_attempts: u32, + pub circuit_breaker_state_cas: bool, + pub circuit_breaker_state_lease_id: Option, + pub circuit_breaker_scope_key: Option, + pub compression_gzip: bool, + pub tls: OtlpHttpTlsOptions, +} + +impl Default for OtlpHttpPushOptions { + fn default() -> Self { + Self { + timeout_ms: 5_000, + bearer_token: None, + retry_max_attempts: 1, + retry_backoff_ms: 100, + retry_backoff_max_ms: 2_000, + retry_jitter_ratio: 0.0, + adaptive_retry_mode: OtlpAdaptiveRetryMode::Linear, + adaptive_retry_ewma_alpha: 0.3, + adaptive_retry: false, + circuit_breaker_failure_threshold: 0, + circuit_breaker_open_ms: 0, + circuit_breaker_half_open_probes: 1, + circuit_breaker_state_path: None, + circuit_breaker_state_url: None, + circuit_breaker_state_patch: false, + circuit_breaker_state_patch_batch: false, + circuit_breaker_state_patch_batch_max_keys: 8, + circuit_breaker_state_patch_merge: false, + circuit_breaker_state_patch_merge_max_keys: 32, + circuit_breaker_state_patch_retry_max_attempts: 1, + circuit_breaker_state_cas: false, + circuit_breaker_state_lease_id: None, + circuit_breaker_scope_key: None, + compression_gzip: false, + tls: OtlpHttpTlsOptions::default(), + } + } +} + pub fn collect_metrics_single_file(db: &SingleFileDB) -> DatabaseMetrics { let stats = db.stats(); let delta = db.delta.read(); @@ -118,6 +276,10 @@ pub fn collect_metrics_single_file(db: &SingleFileDB) -> DatabaseMetrics { }; let cache = build_cache_metrics(cache_stats.as_ref()); + let replication = build_replication_metrics( + db.primary_replication_status(), + db.replica_replication_status(), + ); let delta_bytes = estimate_delta_memory(&delta); let cache_bytes = estimate_cache_memory(cache_stats.as_ref()); let snapshot_bytes = (stats.snapshot_nodes as i64 * 50) + (stats.snapshot_edges as i64 * 20); @@ -145,6 +307,7 @@ pub fn collect_metrics_single_file(db: &SingleFileDB) -> DatabaseMetrics { data, cache, mvcc, + replication, memory: MemoryMetrics { delta_estimate_bytes: delta_bytes, cache_estimate_bytes: cache_bytes, @@ -155,6 +318,1983 @@ pub fn collect_metrics_single_file(db: &SingleFileDB) -> DatabaseMetrics { } } +/// Collect replication-only metrics and render them in Prometheus text format. +pub fn collect_replication_metrics_prometheus_single_file(db: &SingleFileDB) -> String { + let metrics = collect_metrics_single_file(db); + render_replication_metrics_prometheus(&metrics) +} + +/// Collect replication-only metrics and render them as OTLP JSON payload. +pub fn collect_replication_metrics_otel_json_single_file(db: &SingleFileDB) -> String { + let metrics = collect_metrics_single_file(db); + render_replication_metrics_otel_json(&metrics) +} + +/// Collect replication-only metrics and render them as OTLP protobuf payload. +pub fn collect_replication_metrics_otel_protobuf_single_file(db: &SingleFileDB) -> Vec { + let metrics = collect_metrics_single_file(db); + render_replication_metrics_otel_protobuf(&metrics) +} + +/// Push replication OTLP-JSON payload to an OTLP collector endpoint. +/// +/// Expects collector HTTP endpoint (for example `/v1/metrics`). +/// Returns an error when collector responds with non-2xx status. +pub fn push_replication_metrics_otel_json_single_file( + db: &SingleFileDB, + endpoint: &str, + timeout_ms: u64, + bearer_token: Option<&str>, +) -> Result { + let options = OtlpHttpPushOptions { + timeout_ms, + bearer_token: bearer_token.map(ToOwned::to_owned), + ..OtlpHttpPushOptions::default() + }; + push_replication_metrics_otel_json_single_file_with_options(db, endpoint, &options) +} + +/// Push replication OTLP-JSON payload using explicit push options. +pub fn push_replication_metrics_otel_json_single_file_with_options( + db: &SingleFileDB, + endpoint: &str, + options: &OtlpHttpPushOptions, +) -> Result { + let payload = collect_replication_metrics_otel_json_single_file(db); + push_replication_metrics_otel_json_payload_with_options(&payload, endpoint, options) +} + +/// Push pre-rendered replication OTLP-JSON payload to an OTLP collector endpoint. +pub fn push_replication_metrics_otel_json_payload( + payload: &str, + endpoint: &str, + timeout_ms: u64, + bearer_token: Option<&str>, +) -> Result { + let options = OtlpHttpPushOptions { + timeout_ms, + bearer_token: bearer_token.map(ToOwned::to_owned), + ..OtlpHttpPushOptions::default() + }; + push_replication_metrics_otel_json_payload_with_options(payload, endpoint, &options) +} + +/// Push pre-rendered replication OTLP-JSON payload using explicit push options. +pub fn push_replication_metrics_otel_json_payload_with_options( + payload: &str, + endpoint: &str, + options: &OtlpHttpPushOptions, +) -> Result { + push_replication_metrics_otel_http_payload_with_options( + payload.as_bytes(), + endpoint, + options, + "application/json", + ) +} + +/// Push replication OTLP-protobuf payload to an OTLP collector endpoint. +pub fn push_replication_metrics_otel_protobuf_single_file( + db: &SingleFileDB, + endpoint: &str, + timeout_ms: u64, + bearer_token: Option<&str>, +) -> Result { + let options = OtlpHttpPushOptions { + timeout_ms, + bearer_token: bearer_token.map(ToOwned::to_owned), + ..OtlpHttpPushOptions::default() + }; + push_replication_metrics_otel_protobuf_single_file_with_options(db, endpoint, &options) +} + +/// Push replication OTLP-protobuf payload using explicit push options. +pub fn push_replication_metrics_otel_protobuf_single_file_with_options( + db: &SingleFileDB, + endpoint: &str, + options: &OtlpHttpPushOptions, +) -> Result { + let payload = collect_replication_metrics_otel_protobuf_single_file(db); + push_replication_metrics_otel_protobuf_payload_with_options(&payload, endpoint, options) +} + +/// Push pre-rendered replication OTLP-protobuf payload to an OTLP collector endpoint. +pub fn push_replication_metrics_otel_protobuf_payload( + payload: &[u8], + endpoint: &str, + timeout_ms: u64, + bearer_token: Option<&str>, +) -> Result { + let options = OtlpHttpPushOptions { + timeout_ms, + bearer_token: bearer_token.map(ToOwned::to_owned), + ..OtlpHttpPushOptions::default() + }; + push_replication_metrics_otel_protobuf_payload_with_options(payload, endpoint, &options) +} + +/// Push pre-rendered replication OTLP-protobuf payload using explicit push options. +pub fn push_replication_metrics_otel_protobuf_payload_with_options( + payload: &[u8], + endpoint: &str, + options: &OtlpHttpPushOptions, +) -> Result { + push_replication_metrics_otel_http_payload_with_options( + payload, + endpoint, + options, + "application/x-protobuf", + ) +} + +/// Push replication OTLP-protobuf payload to an OTLP collector gRPC endpoint. +pub fn push_replication_metrics_otel_grpc_single_file( + db: &SingleFileDB, + endpoint: &str, + timeout_ms: u64, + bearer_token: Option<&str>, +) -> Result { + let options = OtlpHttpPushOptions { + timeout_ms, + bearer_token: bearer_token.map(ToOwned::to_owned), + ..OtlpHttpPushOptions::default() + }; + push_replication_metrics_otel_grpc_single_file_with_options(db, endpoint, &options) +} + +/// Push replication OTLP-protobuf payload over gRPC using explicit push options. +pub fn push_replication_metrics_otel_grpc_single_file_with_options( + db: &SingleFileDB, + endpoint: &str, + options: &OtlpHttpPushOptions, +) -> Result { + let payload = collect_replication_metrics_otel_protobuf_single_file(db); + push_replication_metrics_otel_grpc_payload_with_options(&payload, endpoint, options) +} + +/// Push pre-rendered replication OTLP-protobuf payload to an OTLP collector gRPC endpoint. +pub fn push_replication_metrics_otel_grpc_payload( + payload: &[u8], + endpoint: &str, + timeout_ms: u64, + bearer_token: Option<&str>, +) -> Result { + let options = OtlpHttpPushOptions { + timeout_ms, + bearer_token: bearer_token.map(ToOwned::to_owned), + ..OtlpHttpPushOptions::default() + }; + push_replication_metrics_otel_grpc_payload_with_options(payload, endpoint, &options) +} + +/// Push pre-rendered replication OTLP-protobuf payload over gRPC using explicit push options. +pub fn push_replication_metrics_otel_grpc_payload_with_options( + payload: &[u8], + endpoint: &str, + options: &OtlpHttpPushOptions, +) -> Result { + let endpoint = endpoint.trim(); + if endpoint.is_empty() { + return Err(KiteError::InvalidQuery( + "OTLP endpoint must not be empty".into(), + )); + } + validate_otel_push_options(options)?; + if options.tls.https_only && !endpoint_uses_https(endpoint) { + return Err(KiteError::InvalidQuery( + "OTLP endpoint must use https when https_only is enabled".into(), + )); + } + check_circuit_breaker_open(endpoint, options)?; + + let request = OtelExportMetricsServiceRequest::decode(payload).map_err(|error| { + KiteError::InvalidQuery(format!("Invalid OTLP protobuf payload: {error}").into()) + })?; + push_replication_metrics_otel_grpc_request_with_options(request, endpoint, options) +} + +fn push_replication_metrics_otel_grpc_request_with_options( + request_payload: OtelExportMetricsServiceRequest, + endpoint: &str, + options: &OtlpHttpPushOptions, +) -> Result { + let timeout = Duration::from_millis(options.timeout_ms); + let ca_cert_pem_path = options + .tls + .ca_cert_pem_path + .as_deref() + .map(str::trim) + .filter(|path| !path.is_empty()); + let client_cert_pem_path = options + .tls + .client_cert_pem_path + .as_deref() + .map(str::trim) + .filter(|path| !path.is_empty()); + let client_key_pem_path = options + .tls + .client_key_pem_path + .as_deref() + .map(str::trim) + .filter(|path| !path.is_empty()); + if client_cert_pem_path.is_some() ^ client_key_pem_path.is_some() { + return Err(KiteError::InvalidQuery( + "OTLP mTLS requires both client_cert_pem_path and client_key_pem_path".into(), + )); + } + let custom_tls_configured = + ca_cert_pem_path.is_some() || (client_cert_pem_path.is_some() && client_key_pem_path.is_some()); + if custom_tls_configured && !endpoint_uses_https(endpoint) { + return Err(KiteError::InvalidQuery( + "OTLP custom TLS/mTLS configuration requires an https endpoint".into(), + )); + } + + let mut endpoint_builder = TonicEndpoint::from_shared(endpoint.to_string()) + .map_err(|error| { + KiteError::InvalidQuery(format!("Invalid OTLP gRPC endpoint: {error}").into()) + })? + .connect_timeout(timeout) + .timeout(timeout); + + if endpoint_uses_https(endpoint) || custom_tls_configured { + let mut tls = ClientTlsConfig::new(); + if let Some(path) = ca_cert_pem_path { + let pem = load_pem_bytes(path, "ca_cert_pem_path")?; + tls = tls.ca_certificate(TonicCertificate::from_pem(pem)); + } + if let (Some(cert_path), Some(key_path)) = (client_cert_pem_path, client_key_pem_path) { + let cert_pem = load_pem_bytes(cert_path, "client_cert_pem_path")?; + let key_pem = load_pem_bytes(key_path, "client_key_pem_path")?; + tls = tls.identity(TonicIdentity::from_pem(cert_pem, key_pem)); + } + endpoint_builder = endpoint_builder.tls_config(tls).map_err(|error| { + KiteError::InvalidQuery(format!("Invalid OTLP gRPC TLS configuration: {error}").into()) + })?; + } + + let bearer_token = options + .bearer_token + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned); + + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(|error| { + KiteError::Internal(format!("Failed to initialize OTLP gRPC runtime: {error}")) + })?; + + runtime.block_on(async move { + for attempt in 1..=options.retry_max_attempts { + let channel = match endpoint_builder.clone().connect().await { + Ok(channel) => channel, + Err(error) => { + let transport_error = KiteError::Io(std::io::Error::other(format!( + "OTLP collector gRPC transport error: {error}" + ))); + if attempt < options.retry_max_attempts { + tokio::time::sleep(retry_backoff_with_jitter_duration( + endpoint, options, attempt, + )) + .await; + continue; + } + record_circuit_breaker_failure(endpoint, options); + return Err(transport_error); + } + }; + + let mut client = OtelMetricsServiceClient::new(channel); + if options.compression_gzip { + client = client + .send_compressed(TonicCompressionEncoding::Gzip) + .accept_compressed(TonicCompressionEncoding::Gzip); + } + + let mut request = tonic::Request::new(request_payload.clone()); + if let Some(token) = bearer_token.as_deref() { + let header_value = MetadataValue::try_from(format!("Bearer {token}")).map_err(|error| { + KiteError::InvalidQuery( + format!("Invalid OTLP bearer token for gRPC metadata: {error}").into(), + ) + })?; + request.metadata_mut().insert("authorization", header_value); + } + + match client.export(request).await { + Ok(response) => { + let body = response.into_inner(); + let response_body = match body.partial_success { + Some(partial) => format!( + "partial_success rejected_data_points={} error_message={}", + partial.rejected_data_points, partial.error_message + ), + None => String::new(), + }; + record_circuit_breaker_success(endpoint, options); + return Ok(OtlpHttpExportResult { + status_code: 200, + response_body, + }); + } + Err(status) => { + if attempt < options.retry_max_attempts && should_retry_grpc_status(status.code()) { + tokio::time::sleep(retry_backoff_with_jitter_duration( + endpoint, options, attempt, + )) + .await; + continue; + } + record_circuit_breaker_failure(endpoint, options); + return Err(KiteError::Internal(format!( + "OTLP collector rejected replication metrics over gRPC: {status}" + ))); + } + } + } + + record_circuit_breaker_failure(endpoint, options); + Err(KiteError::Internal( + "OTLP gRPC exporter exhausted retry attempts".to_string(), + )) + }) +} + +fn push_replication_metrics_otel_http_payload_with_options( + payload: &[u8], + endpoint: &str, + options: &OtlpHttpPushOptions, + content_type: &str, +) -> Result { + let endpoint = endpoint.trim(); + if endpoint.is_empty() { + return Err(KiteError::InvalidQuery( + "OTLP endpoint must not be empty".into(), + )); + } + validate_otel_push_options(options)?; + if options.tls.https_only && !endpoint_uses_https(endpoint) { + return Err(KiteError::InvalidQuery( + "OTLP endpoint must use https when https_only is enabled".into(), + )); + } + check_circuit_breaker_open(endpoint, options)?; + + let request_payload = encode_http_request_payload(payload, options.compression_gzip)?; + for attempt in 1..=options.retry_max_attempts { + let timeout = Duration::from_millis(options.timeout_ms); + let agent = build_otel_http_agent(endpoint, options, timeout)?; + let mut request = agent + .post(endpoint) + .set("content-type", content_type) + .timeout(timeout); + if options.compression_gzip { + request = request.set("content-encoding", "gzip"); + } + if let Some(token) = options.bearer_token.as_deref() { + if !token.trim().is_empty() { + request = request.set("authorization", &format!("Bearer {token}")); + } + } + + match request.send_bytes(&request_payload) { + Ok(response) => { + let status_code = response.status() as i64; + let response_body = response.into_string().unwrap_or_default(); + record_circuit_breaker_success(endpoint, options); + return Ok(OtlpHttpExportResult { + status_code, + response_body, + }); + } + Err(ureq::Error::Status(status_code, response)) => { + let body = response.into_string().unwrap_or_default(); + if attempt < options.retry_max_attempts && should_retry_http_status(status_code) { + thread::sleep(retry_backoff_with_jitter_duration( + endpoint, options, attempt, + )); + continue; + } + record_circuit_breaker_failure(endpoint, options); + return Err(KiteError::Internal(format!( + "OTLP collector rejected replication metrics: status {status_code}, body: {body}" + ))); + } + Err(ureq::Error::Transport(error)) => { + if attempt < options.retry_max_attempts { + thread::sleep(retry_backoff_with_jitter_duration( + endpoint, options, attempt, + )); + continue; + } + record_circuit_breaker_failure(endpoint, options); + return Err(KiteError::Io(std::io::Error::other(format!( + "OTLP collector transport error: {error}" + )))); + } + } + } + + Err(KiteError::Internal( + "OTLP exporter exhausted retry attempts".to_string(), + )) +} + +fn validate_otel_push_options(options: &OtlpHttpPushOptions) -> Result<()> { + if options.timeout_ms == 0 { + return Err(KiteError::InvalidQuery("timeout_ms must be > 0".into())); + } + if options.retry_max_attempts == 0 { + return Err(KiteError::InvalidQuery( + "retry_max_attempts must be > 0".into(), + )); + } + if !(0.0..=1.0).contains(&options.retry_jitter_ratio) { + return Err(KiteError::InvalidQuery( + "retry_jitter_ratio must be within [0.0, 1.0]".into(), + )); + } + if !(0.0..=1.0).contains(&options.adaptive_retry_ewma_alpha) { + return Err(KiteError::InvalidQuery( + "adaptive_retry_ewma_alpha must be within [0.0, 1.0]".into(), + )); + } + if options.circuit_breaker_failure_threshold > 0 && options.circuit_breaker_open_ms == 0 { + return Err(KiteError::InvalidQuery( + "circuit_breaker_open_ms must be > 0 when circuit_breaker_failure_threshold is enabled" + .into(), + )); + } + if options.circuit_breaker_failure_threshold > 0 && options.circuit_breaker_half_open_probes == 0 + { + return Err(KiteError::InvalidQuery( + "circuit_breaker_half_open_probes must be > 0 when circuit_breaker_failure_threshold is enabled" + .into(), + )); + } + if let Some(path) = options.circuit_breaker_state_path.as_deref() { + if path.trim().is_empty() { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_path must not be empty when provided".into(), + )); + } + } + if let Some(url) = options.circuit_breaker_state_url.as_deref() { + let trimmed = url.trim(); + if trimmed.is_empty() { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_url must not be empty when provided".into(), + )); + } + if !(trimmed.starts_with("http://") || trimmed.starts_with("https://")) { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_url must use http:// or https://".into(), + )); + } + if options.tls.https_only && !endpoint_uses_https(trimmed) { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_url must use https when https_only is enabled".into(), + )); + } + } + if options.circuit_breaker_state_path.is_some() && options.circuit_breaker_state_url.is_some() { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_path and circuit_breaker_state_url are mutually exclusive".into(), + )); + } + if options.circuit_breaker_state_patch && options.circuit_breaker_state_url.is_none() { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_patch requires circuit_breaker_state_url".into(), + )); + } + if options.circuit_breaker_state_patch_batch && !options.circuit_breaker_state_patch { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_patch_batch requires circuit_breaker_state_patch".into(), + )); + } + if options.circuit_breaker_state_patch_merge && !options.circuit_breaker_state_patch { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_patch_merge requires circuit_breaker_state_patch".into(), + )); + } + if options.circuit_breaker_state_patch_batch_max_keys == 0 { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_patch_batch_max_keys must be > 0".into(), + )); + } + if options.circuit_breaker_state_patch_merge_max_keys == 0 { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_patch_merge_max_keys must be > 0".into(), + )); + } + if options.circuit_breaker_state_patch_retry_max_attempts == 0 { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_patch_retry_max_attempts must be > 0".into(), + )); + } + if options.circuit_breaker_state_cas && options.circuit_breaker_state_url.is_none() { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_cas requires circuit_breaker_state_url".into(), + )); + } + if let Some(lease_id) = options.circuit_breaker_state_lease_id.as_deref() { + if lease_id.trim().is_empty() { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_lease_id must not be empty when provided".into(), + )); + } + if options.circuit_breaker_state_url.is_none() { + return Err(KiteError::InvalidQuery( + "circuit_breaker_state_lease_id requires circuit_breaker_state_url".into(), + )); + } + } + if let Some(scope_key) = options.circuit_breaker_scope_key.as_deref() { + if scope_key.trim().is_empty() { + return Err(KiteError::InvalidQuery( + "circuit_breaker_scope_key must not be empty when provided".into(), + )); + } + } + Ok(()) +} + +fn should_retry_http_status(status_code: u16) -> bool { + status_code == 429 || status_code >= 500 +} + +fn should_retry_grpc_status(code: TonicCode) -> bool { + matches!( + code, + TonicCode::Unavailable | TonicCode::DeadlineExceeded | TonicCode::ResourceExhausted + ) +} + +fn retry_backoff_duration(options: &OtlpHttpPushOptions, attempt: u32) -> Duration { + if attempt <= 1 || options.retry_backoff_ms == 0 { + return Duration::from_millis(options.retry_backoff_ms); + } + let shift = (attempt - 1).min(31); + let multiplier = 1u64.checked_shl(shift).unwrap_or(u64::MAX); + let raw = options.retry_backoff_ms.saturating_mul(multiplier); + let backoff = if options.retry_backoff_max_ms == 0 { + raw + } else { + raw.min(options.retry_backoff_max_ms) + }; + Duration::from_millis(backoff) +} + +fn retry_backoff_with_jitter_duration( + endpoint: &str, + options: &OtlpHttpPushOptions, + attempt: u32, +) -> Duration { + let multiplier = adaptive_retry_multiplier(endpoint, options); + let base = retry_backoff_duration(options, attempt); + let mut base_ms = base.as_millis() as u64; + if multiplier > 1 { + base_ms = base_ms.saturating_mul(multiplier); + if options.retry_backoff_max_ms > 0 { + base_ms = base_ms.min(options.retry_backoff_max_ms); + } + } + if options.retry_jitter_ratio <= 0.0 { + return Duration::from_millis(base_ms); + } + if base_ms == 0 { + return Duration::from_millis(base_ms); + } + let jitter_max = ((base_ms as f64) * options.retry_jitter_ratio) as u64; + if jitter_max == 0 { + return Duration::from_millis(base_ms); + } + let jitter = rand::thread_rng().gen_range(0..=jitter_max); + Duration::from_millis(base_ms.saturating_add(jitter)) +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(default)] +struct OtlpCircuitBreakerState { + consecutive_failures: u32, + open_until_ms: u64, + half_open_remaining_probes: u32, + half_open_in_flight: bool, + ewma_error_score: f64, +} + +static OTLP_CIRCUIT_BREAKERS: OnceLock>> = + OnceLock::new(); +static OTLP_CIRCUIT_BREAKER_STATE_URL_ETAGS: OnceLock>> = + OnceLock::new(); + +fn otlp_circuit_breakers() -> &'static Mutex> { + OTLP_CIRCUIT_BREAKERS.get_or_init(|| Mutex::new(HashMap::new())) +} + +fn otlp_circuit_breaker_state_url_etags() -> &'static Mutex> { + OTLP_CIRCUIT_BREAKER_STATE_URL_ETAGS.get_or_init(|| Mutex::new(HashMap::new())) +} + +fn circuit_breaker_now_ms() -> u64 { + SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +fn circuit_breaker_key(endpoint: &str, options: &OtlpHttpPushOptions) -> String { + options + .circuit_breaker_scope_key + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + .unwrap_or(endpoint) + .to_string() +} + +fn circuit_breaker_state_path(options: &OtlpHttpPushOptions) -> Option<&str> { + options + .circuit_breaker_state_path + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) +} + +fn circuit_breaker_state_url(options: &OtlpHttpPushOptions) -> Option<&str> { + options + .circuit_breaker_state_url + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) +} + +fn circuit_breaker_state_url_etag_key(url: &str, scope: &str, key: Option<&str>) -> String { + match key { + Some(value) => format!("{url}::{scope}::{value}"), + None => format!("{url}::{scope}"), + } +} + +fn load_persisted_breakers_from_path(path: &str) -> HashMap { + let raw = match fs::read(path) { + Ok(bytes) => bytes, + Err(_) => return HashMap::new(), + }; + serde_json::from_slice::>(&raw).unwrap_or_default() +} + +fn load_persisted_breakers_from_url( + url: &str, + options: &OtlpHttpPushOptions, +) -> HashMap { + let timeout = Duration::from_millis(options.timeout_ms.max(1)); + let agent = match build_otel_http_agent(url, options, timeout) { + Ok(agent) => agent, + Err(_) => return HashMap::new(), + }; + let mut request = agent.get(url).timeout(timeout); + if let Some(lease_id) = options.circuit_breaker_state_lease_id.as_deref() { + request = request.set("x-kitedb-breaker-lease", lease_id); + } + let response = match request.call() { + Ok(response) => response, + Err(_) => return HashMap::new(), + }; + if options.circuit_breaker_state_cas { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "doc", None), + etag.to_string(), + ); + } + } + let body = response.into_string().unwrap_or_default(); + serde_json::from_str::>(&body).unwrap_or_default() +} + +fn load_persisted_breaker_from_url_patch( + url: &str, + key: &str, + options: &OtlpHttpPushOptions, +) -> Option { + let timeout = Duration::from_millis(options.timeout_ms.max(1)); + let agent = match build_otel_http_agent(url, options, timeout) { + Ok(agent) => agent, + Err(_) => return None, + }; + let mut request = agent + .get(url) + .set("x-kitedb-breaker-mode", "patch-v1") + .set("x-kitedb-breaker-key", key) + .timeout(timeout); + if let Some(lease_id) = options.circuit_breaker_state_lease_id.as_deref() { + request = request.set("x-kitedb-breaker-lease", lease_id); + } + let response = match request.call() { + Ok(response) => response, + Err(ureq::Error::Status(404, _)) => return None, + Err(_) => return None, + }; + if options.circuit_breaker_state_cas { + if let Some(etag) = response.header("etag") { + let mut etags = otlp_circuit_breaker_state_url_etags().lock(); + etags.insert( + circuit_breaker_state_url_etag_key(url, "patch", Some(key)), + etag.to_string(), + ); + if options.circuit_breaker_state_patch_batch { + etags.insert( + circuit_breaker_state_url_etag_key(url, "batch", None), + etag.to_string(), + ); + } + if options.circuit_breaker_state_patch_merge { + etags.insert( + circuit_breaker_state_url_etag_key(url, "merge", None), + etag.to_string(), + ); + } + } + } + let body = response.into_string().unwrap_or_default(); + if body.trim().is_empty() { + return None; + } + if let Ok(state) = serde_json::from_str::(&body) { + return Some(state); + } + let wrapper = serde_json::from_str::(&body).ok()?; + let state = wrapper.get("state")?; + serde_json::from_value::(state.clone()).ok() +} + +fn load_persisted_breaker_state( + key: &str, + options: &OtlpHttpPushOptions, +) -> Option { + if let Some(path) = circuit_breaker_state_path(options) { + return load_persisted_breakers_from_path(path).get(key).cloned(); + } + if let Some(url) = circuit_breaker_state_url(options) { + if options.circuit_breaker_state_patch { + return load_persisted_breaker_from_url_patch(url, key, options); + } + return load_persisted_breakers_from_url(url, options) + .get(key) + .cloned(); + } + None +} + +fn persist_breakers_to_path(path: &str, states: &HashMap) { + let Ok(serialized) = serde_json::to_vec(states) else { + return; + }; + let _ = fs::write(path, serialized); +} + +fn persist_breakers_to_url( + url: &str, + options: &OtlpHttpPushOptions, + states: &HashMap, +) { + let Ok(serialized) = serde_json::to_vec(states) else { + return; + }; + let timeout = Duration::from_millis(options.timeout_ms.max(1)); + let Ok(agent) = build_otel_http_agent(url, options, timeout) else { + return; + }; + let mut request = agent + .put(url) + .set("content-type", "application/json") + .timeout(timeout); + if options.circuit_breaker_state_cas { + if let Some(etag) = otlp_circuit_breaker_state_url_etags() + .lock() + .get(&circuit_breaker_state_url_etag_key(url, "doc", None)) + .cloned() + { + request = request.set("if-match", &etag); + } else { + request = request.set("if-match", "*"); + } + } + if let Some(lease_id) = options.circuit_breaker_state_lease_id.as_deref() { + request = request.set("x-kitedb-breaker-lease", lease_id); + } + match request.send_bytes(&serialized) { + Ok(response) => { + if options.circuit_breaker_state_cas { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "doc", None), + etag.to_string(), + ); + } + } + } + Err(ureq::Error::Status(status, response)) => { + if options.circuit_breaker_state_cas && (status == 409 || status == 412) { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "doc", None), + etag.to_string(), + ); + } + } + } + Err(_) => {} + } +} + +fn persist_breaker_to_url_patch( + url: &str, + key: &str, + state: Option<&OtlpCircuitBreakerState>, + options: &OtlpHttpPushOptions, +) { + let payload = json!({ + "key": key, + "state": state, + }); + let Ok(serialized) = serde_json::to_vec(&payload) else { + return; + }; + let attempts = options + .circuit_breaker_state_patch_retry_max_attempts + .max(1); + for attempt in 1..=attempts { + let timeout = Duration::from_millis(options.timeout_ms.max(1)); + let Ok(agent) = build_otel_http_agent(url, options, timeout) else { + return; + }; + let mut request = agent + .request("PATCH", url) + .set("content-type", "application/json") + .set("x-kitedb-breaker-mode", "patch-v1") + .set("x-kitedb-breaker-key", key) + .timeout(timeout); + if options.circuit_breaker_state_cas { + if let Some(etag) = otlp_circuit_breaker_state_url_etags() + .lock() + .get(&circuit_breaker_state_url_etag_key(url, "patch", Some(key))) + .cloned() + { + request = request.set("if-match", &etag); + } else { + request = request.set("if-match", "*"); + } + } + if let Some(lease_id) = options.circuit_breaker_state_lease_id.as_deref() { + request = request.set("x-kitedb-breaker-lease", lease_id); + } + match request.send_bytes(&serialized) { + Ok(response) => { + if options.circuit_breaker_state_cas { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "patch", Some(key)), + etag.to_string(), + ); + } + } + return; + } + Err(ureq::Error::Status(status, response)) => { + if options.circuit_breaker_state_cas && (status == 409 || status == 412) { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "patch", Some(key)), + etag.to_string(), + ); + } + if attempt < attempts { + continue; + } + } + return; + } + Err(_) => return, + } + } +} + +fn persist_breakers_to_url_patch_batch( + url: &str, + primary_key: &str, + states: &HashMap, + options: &OtlpHttpPushOptions, +) { + let mut updates = Vec::new(); + let max_keys = + usize::try_from(options.circuit_breaker_state_patch_batch_max_keys).unwrap_or(usize::MAX); + if let Some(state) = states.get(primary_key) { + updates.push(json!({ "key": primary_key, "state": state })); + } else { + updates.push(json!({ "key": primary_key, "state": Value::Null })); + } + if max_keys > 1 { + for (key, state) in states { + if key == primary_key { + continue; + } + updates.push(json!({ "key": key, "state": state })); + if updates.len() >= max_keys { + break; + } + } + } + let payload = json!({ "updates": updates }); + let Ok(serialized) = serde_json::to_vec(&payload) else { + return; + }; + + let attempts = options + .circuit_breaker_state_patch_retry_max_attempts + .max(1); + for attempt in 1..=attempts { + let timeout = Duration::from_millis(options.timeout_ms.max(1)); + let Ok(agent) = build_otel_http_agent(url, options, timeout) else { + return; + }; + let mut request = agent + .request("PATCH", url) + .set("content-type", "application/json") + .set("x-kitedb-breaker-mode", "patch-batch-v1") + .set("x-kitedb-breaker-key", primary_key) + .timeout(timeout); + if options.circuit_breaker_state_cas { + if let Some(etag) = otlp_circuit_breaker_state_url_etags() + .lock() + .get(&circuit_breaker_state_url_etag_key(url, "batch", None)) + .cloned() + { + request = request.set("if-match", &etag); + } else { + request = request.set("if-match", "*"); + } + } + if let Some(lease_id) = options.circuit_breaker_state_lease_id.as_deref() { + request = request.set("x-kitedb-breaker-lease", lease_id); + } + match request.send_bytes(&serialized) { + Ok(response) => { + if options.circuit_breaker_state_cas { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "batch", None), + etag.to_string(), + ); + } + } + return; + } + Err(ureq::Error::Status(status, response)) => { + if options.circuit_breaker_state_cas && (status == 409 || status == 412) { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "batch", None), + etag.to_string(), + ); + } + if attempt < attempts { + continue; + } + } + return; + } + Err(_) => return, + } + } +} + +fn persist_breakers_to_url_patch_merge( + url: &str, + primary_key: &str, + states: &HashMap, + options: &OtlpHttpPushOptions, +) { + let mut updates = Vec::new(); + let max_keys = + usize::try_from(options.circuit_breaker_state_patch_merge_max_keys).unwrap_or(usize::MAX); + if let Some(state) = states.get(primary_key) { + updates.push(json!({ "key": primary_key, "state": state })); + } else { + updates.push(json!({ "key": primary_key, "state": Value::Null })); + } + if max_keys > 1 { + for (key, state) in states { + if key == primary_key { + continue; + } + updates.push(json!({ "key": key, "state": state })); + if updates.len() >= max_keys { + break; + } + } + } + let total_keys = states + .len() + .saturating_add(usize::from(!states.contains_key(primary_key))); + let payload = json!({ + "scope_key": primary_key, + "total_keys": total_keys, + "truncated": total_keys > updates.len(), + "updates": updates, + }); + let Ok(serialized) = serde_json::to_vec(&payload) else { + return; + }; + + let attempts = options + .circuit_breaker_state_patch_retry_max_attempts + .max(1); + for attempt in 1..=attempts { + let timeout = Duration::from_millis(options.timeout_ms.max(1)); + let Ok(agent) = build_otel_http_agent(url, options, timeout) else { + return; + }; + let mut request = agent + .request("PATCH", url) + .set("content-type", "application/json") + .set("x-kitedb-breaker-mode", "patch-merge-v1") + .set("x-kitedb-breaker-key", primary_key) + .timeout(timeout); + if options.circuit_breaker_state_cas { + if let Some(etag) = otlp_circuit_breaker_state_url_etags() + .lock() + .get(&circuit_breaker_state_url_etag_key(url, "merge", None)) + .cloned() + { + request = request.set("if-match", &etag); + } else { + request = request.set("if-match", "*"); + } + } + if let Some(lease_id) = options.circuit_breaker_state_lease_id.as_deref() { + request = request.set("x-kitedb-breaker-lease", lease_id); + } + match request.send_bytes(&serialized) { + Ok(response) => { + if options.circuit_breaker_state_cas { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "merge", None), + etag.to_string(), + ); + } + } + return; + } + Err(ureq::Error::Status(status, response)) => { + if options.circuit_breaker_state_cas && (status == 409 || status == 412) { + if let Some(etag) = response.header("etag") { + otlp_circuit_breaker_state_url_etags().lock().insert( + circuit_breaker_state_url_etag_key(url, "merge", None), + etag.to_string(), + ); + } + if attempt < attempts { + continue; + } + } + return; + } + Err(_) => return, + } + } +} + +fn persist_breakers( + options: &OtlpHttpPushOptions, + key: &str, + states: &HashMap, +) { + if let Some(path) = circuit_breaker_state_path(options) { + persist_breakers_to_path(path, states); + } else if let Some(url) = circuit_breaker_state_url(options) { + if options.circuit_breaker_state_patch { + if options.circuit_breaker_state_patch_merge { + persist_breakers_to_url_patch_merge(url, key, states, options); + } else if options.circuit_breaker_state_patch_batch { + persist_breakers_to_url_patch_batch(url, key, states, options); + } else { + persist_breaker_to_url_patch(url, key, states.get(key), options); + } + } else { + persist_breakers_to_url(url, options, states); + } + } +} + +fn merge_persisted_breaker_state( + key: &str, + persisted_state: Option, + states: &mut HashMap, +) { + let Some(persisted_state) = persisted_state else { + return; + }; + let entry = states.entry(key.to_string()).or_default(); + entry.consecutive_failures = entry + .consecutive_failures + .max(persisted_state.consecutive_failures); + entry.open_until_ms = entry.open_until_ms.max(persisted_state.open_until_ms); + entry.half_open_remaining_probes = entry + .half_open_remaining_probes + .max(persisted_state.half_open_remaining_probes); + entry.ewma_error_score = entry + .ewma_error_score + .max(persisted_state.ewma_error_score) + .clamp(0.0, 1.0); +} + +fn adaptive_retry_multiplier(endpoint: &str, options: &OtlpHttpPushOptions) -> u64 { + if !options.adaptive_retry { + return 1; + } + let key = circuit_breaker_key(endpoint, options); + let persisted_state = load_persisted_breaker_state(&key, options); + let mut states = otlp_circuit_breakers().lock(); + merge_persisted_breaker_state(&key, persisted_state, &mut states); + let multiplier = states + .get(&key) + .map(|state| match options.adaptive_retry_mode { + OtlpAdaptiveRetryMode::Linear => 1 + u64::from(state.consecutive_failures.min(8)), + OtlpAdaptiveRetryMode::Ewma => { + let score = state.ewma_error_score.clamp(0.0, 1.0); + 1 + ((score * 8.0).round() as u64) + } + }) + .unwrap_or(1); + multiplier.max(1) +} + +fn check_circuit_breaker_open(endpoint: &str, options: &OtlpHttpPushOptions) -> Result<()> { + if options.circuit_breaker_failure_threshold == 0 { + return Ok(()); + } + let key = circuit_breaker_key(endpoint, options); + let now = circuit_breaker_now_ms(); + let persisted_state = load_persisted_breaker_state(&key, options); + let snapshot = { + let mut states = otlp_circuit_breakers().lock(); + merge_persisted_breaker_state(&key, persisted_state, &mut states); + let Some(state) = states.get_mut(&key) else { + return Ok(()); + }; + if state.open_until_ms > now { + return Err(KiteError::Internal(format!( + "OTLP circuit breaker open for endpoint {endpoint} until {}", + state.open_until_ms + ))); + } + + let mut changed = false; + if state.open_until_ms > 0 { + state.open_until_ms = 0; + if state.half_open_remaining_probes == 0 && !state.half_open_in_flight { + state.half_open_remaining_probes = options.circuit_breaker_half_open_probes.max(1); + } + changed = true; + } + + if state.half_open_in_flight { + return Err(KiteError::Internal(format!( + "OTLP circuit breaker half-open probe already in flight for endpoint {endpoint}" + ))); + } + + if state.half_open_remaining_probes > 0 { + state.half_open_remaining_probes = state.half_open_remaining_probes.saturating_sub(1); + state.half_open_in_flight = true; + changed = true; + } + + if changed { + Some(states.clone()) + } else { + None + } + }; + if let Some(snapshot) = snapshot { + persist_breakers(options, &key, &snapshot); + } + Ok(()) +} + +fn record_circuit_breaker_success(endpoint: &str, options: &OtlpHttpPushOptions) { + if options.circuit_breaker_failure_threshold == 0 && !options.adaptive_retry { + return; + } + let key = circuit_breaker_key(endpoint, options); + let persisted_state = load_persisted_breaker_state(&key, options); + let snapshot = { + let mut states = otlp_circuit_breakers().lock(); + merge_persisted_breaker_state(&key, persisted_state, &mut states); + let state = states.entry(key.clone()).or_default(); + let alpha = options.adaptive_retry_ewma_alpha.clamp(0.0, 1.0); + state.ewma_error_score = ((1.0 - alpha) * state.ewma_error_score).clamp(0.0, 1.0); + state.consecutive_failures = 0; + state.open_until_ms = 0; + state.half_open_in_flight = false; + if !options.adaptive_retry + && state.consecutive_failures == 0 + && state.open_until_ms == 0 + && state.half_open_remaining_probes == 0 + && !state.half_open_in_flight + { + states.remove(&key); + } else if options.adaptive_retry + && state.consecutive_failures == 0 + && state.open_until_ms == 0 + && state.half_open_remaining_probes == 0 + && !state.half_open_in_flight + && state.ewma_error_score <= f64::EPSILON + { + states.remove(&key); + } + states.clone() + }; + persist_breakers(options, &key, &snapshot); +} + +fn record_circuit_breaker_failure(endpoint: &str, options: &OtlpHttpPushOptions) { + if (options.circuit_breaker_failure_threshold == 0 || options.circuit_breaker_open_ms == 0) + && !options.adaptive_retry + { + return; + } + let key = circuit_breaker_key(endpoint, options); + let now = circuit_breaker_now_ms(); + let persisted_state = load_persisted_breaker_state(&key, options); + let snapshot = { + let mut states = otlp_circuit_breakers().lock(); + merge_persisted_breaker_state(&key, persisted_state, &mut states); + let state = states.entry(key.clone()).or_default(); + let alpha = options.adaptive_retry_ewma_alpha.clamp(0.0, 1.0); + state.ewma_error_score = ((1.0 - alpha) * state.ewma_error_score + alpha).clamp(0.0, 1.0); + if options.circuit_breaker_failure_threshold > 0 && options.circuit_breaker_open_ms > 0 { + let probe_budget = options.circuit_breaker_half_open_probes.max(1); + if state.half_open_in_flight || state.half_open_remaining_probes > 0 { + state.open_until_ms = now.saturating_add(options.circuit_breaker_open_ms); + state.consecutive_failures = 0; + state.half_open_remaining_probes = probe_budget; + state.half_open_in_flight = false; + } else { + state.consecutive_failures = state.consecutive_failures.saturating_add(1); + if state.consecutive_failures >= options.circuit_breaker_failure_threshold { + state.open_until_ms = now.saturating_add(options.circuit_breaker_open_ms); + state.consecutive_failures = 0; + state.half_open_remaining_probes = probe_budget; + state.half_open_in_flight = false; + } + } + } + states.clone() + }; + persist_breakers(options, &key, &snapshot); +} + +fn encode_http_request_payload(payload: &[u8], compression_gzip: bool) -> Result> { + if !compression_gzip { + return Ok(payload.to_vec()); + } + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(payload).map_err(|error| { + KiteError::Internal(format!( + "Failed compressing OTLP payload with gzip: {error}" + )) + })?; + encoder.finish().map_err(|error| { + KiteError::Internal(format!( + "Failed finalizing compressed OTLP payload: {error}" + )) + }) +} + +fn endpoint_uses_https(endpoint: &str) -> bool { + endpoint.to_ascii_lowercase().starts_with("https://") +} + +fn build_otel_http_agent( + endpoint: &str, + options: &OtlpHttpPushOptions, + timeout: Duration, +) -> Result { + let ca_cert_pem_path = options + .tls + .ca_cert_pem_path + .as_deref() + .map(str::trim) + .filter(|path| !path.is_empty()); + let client_cert_pem_path = options + .tls + .client_cert_pem_path + .as_deref() + .map(str::trim) + .filter(|path| !path.is_empty()); + let client_key_pem_path = options + .tls + .client_key_pem_path + .as_deref() + .map(str::trim) + .filter(|path| !path.is_empty()); + + if client_cert_pem_path.is_some() ^ client_key_pem_path.is_some() { + return Err(KiteError::InvalidQuery( + "OTLP mTLS requires both client_cert_pem_path and client_key_pem_path".into(), + )); + } + + let custom_tls_configured = + ca_cert_pem_path.is_some() || (client_cert_pem_path.is_some() && client_key_pem_path.is_some()); + if custom_tls_configured && !endpoint_uses_https(endpoint) { + return Err(KiteError::InvalidQuery( + "OTLP custom TLS/mTLS configuration requires an https endpoint".into(), + )); + } + + let mut builder = ureq::builder() + .https_only(options.tls.https_only) + .timeout_connect(timeout) + .timeout_read(timeout) + .timeout_write(timeout); + + if custom_tls_configured { + let mut root_store = ureq::rustls::RootCertStore::empty(); + root_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); + + if let Some(path) = ca_cert_pem_path { + let certs = load_certificates_from_pem(path, "ca_cert_pem_path")?; + let (valid_count, _) = root_store.add_parsable_certificates(certs); + if valid_count == 0 { + return Err(KiteError::InvalidQuery( + format!("No valid CA certificates found in ca_cert_pem_path: {path}").into(), + )); + } + } + + let client_config_builder = + ureq::rustls::ClientConfig::builder().with_root_certificates(root_store); + let client_config = + if let (Some(cert_path), Some(key_path)) = (client_cert_pem_path, client_key_pem_path) { + let certs = load_certificates_from_pem(cert_path, "client_cert_pem_path")?; + let key = load_private_key_from_pem(key_path, "client_key_pem_path")?; + client_config_builder + .with_client_auth_cert(certs, key) + .map_err(|error| { + KiteError::InvalidQuery( + format!("Invalid OTLP client certificate/key for mTLS: {error}").into(), + ) + })? + } else { + client_config_builder.with_no_client_auth() + }; + + builder = builder.tls_config(Arc::new(client_config)); + } + + Ok(builder.build()) +} + +fn load_certificates_from_pem( + path: &str, + field_name: &str, +) -> Result>> { + let file = File::open(path).map_err(|error| { + KiteError::InvalidQuery(format!("Failed opening {field_name} '{path}': {error}").into()) + })?; + let mut reader = BufReader::new(file); + let certs = rustls_pemfile::certs(&mut reader) + .collect::, _>>() + .map_err(|error| { + KiteError::InvalidQuery( + format!("Failed parsing certificates from {field_name} '{path}': {error}").into(), + ) + })?; + if certs.is_empty() { + return Err(KiteError::InvalidQuery( + format!("No certificates found in {field_name} '{path}'").into(), + )); + } + Ok(certs) +} + +fn load_private_key_from_pem( + path: &str, + field_name: &str, +) -> Result> { + let file = File::open(path).map_err(|error| { + KiteError::InvalidQuery(format!("Failed opening {field_name} '{path}': {error}").into()) + })?; + let mut reader = BufReader::new(file); + rustls_pemfile::private_key(&mut reader) + .map_err(|error| { + KiteError::InvalidQuery( + format!("Failed parsing private key from {field_name} '{path}': {error}").into(), + ) + })? + .ok_or_else(|| { + KiteError::InvalidQuery(format!("No private key found in {field_name} '{path}'").into()) + }) +} + +fn load_pem_bytes(path: &str, field_name: &str) -> Result> { + let bytes = fs::read(path).map_err(|error| { + KiteError::InvalidQuery(format!("Failed reading {field_name} '{path}': {error}").into()) + })?; + if bytes.is_empty() { + return Err(KiteError::InvalidQuery( + format!("{field_name} '{path}' is empty").into(), + )); + } + Ok(bytes) +} + +/// Render replication metrics from a metrics snapshot using Prometheus exposition format. +pub fn render_replication_metrics_prometheus(metrics: &DatabaseMetrics) -> String { + let mut lines = Vec::new(); + let role = metrics.replication.role.as_str(); + let enabled = if metrics.replication.enabled { 1 } else { 0 }; + + push_prometheus_help( + &mut lines, + "kitedb_replication_enabled", + "gauge", + "Whether replication is enabled for this database (1 enabled, 0 disabled).", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_enabled", + enabled, + &[("role", role)], + ); + + // Host-runtime export path is process-local and does not enforce HTTP auth. + push_prometheus_help( + &mut lines, + "kitedb_replication_auth_enabled", + "gauge", + "Whether replication admin auth is enabled for this metrics exporter.", + ); + push_prometheus_sample(&mut lines, "kitedb_replication_auth_enabled", 0, &[]); + + if let Some(primary) = metrics.replication.primary.as_ref() { + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_epoch", + "gauge", + "Current primary replication epoch.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_epoch", + primary.epoch, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_head_log_index", + "gauge", + "Current primary head log index.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_head_log_index", + primary.head_log_index, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_retained_floor", + "gauge", + "Current primary retained floor log index.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_retained_floor", + primary.retained_floor, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_replica_count", + "gauge", + "Replica progress reporters known by this primary.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_replica_count", + primary.replica_count, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_stale_epoch_replica_count", + "gauge", + "Replica reporters currently on stale epochs.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_stale_epoch_replica_count", + primary.stale_epoch_replica_count, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_max_replica_lag", + "gauge", + "Maximum reported lag (log frames) across replicas.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_max_replica_lag", + primary.max_replica_lag, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_append_attempts_total", + "counter", + "Total replication append attempts on the primary commit path.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_append_attempts_total", + primary.append_attempts, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_append_failures_total", + "counter", + "Total replication append failures on the primary commit path.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_append_failures_total", + primary.append_failures, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_primary_append_successes_total", + "counter", + "Total replication append successes on the primary commit path.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_primary_append_successes_total", + primary.append_successes, + &[], + ); + } + + if let Some(replica) = metrics.replication.replica.as_ref() { + push_prometheus_help( + &mut lines, + "kitedb_replication_replica_applied_epoch", + "gauge", + "Replica applied epoch.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_replica_applied_epoch", + replica.applied_epoch, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_replica_applied_log_index", + "gauge", + "Replica applied log index.", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_replica_applied_log_index", + replica.applied_log_index, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_replica_needs_reseed", + "gauge", + "Whether replica currently requires snapshot reseed (1 yes, 0 no).", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_replica_needs_reseed", + if replica.needs_reseed { 1 } else { 0 }, + &[], + ); + + push_prometheus_help( + &mut lines, + "kitedb_replication_replica_last_error_present", + "gauge", + "Whether replica currently has a non-empty last_error value (1 yes, 0 no).", + ); + push_prometheus_sample( + &mut lines, + "kitedb_replication_replica_last_error_present", + if replica.last_error.is_some() { 1 } else { 0 }, + &[], + ); + } + + let mut text = lines.join("\n"); + text.push('\n'); + text +} + +/// Render replication metrics in OpenTelemetry OTLP JSON format. +pub fn render_replication_metrics_otel_json(metrics: &DatabaseMetrics) -> String { + let role = metrics.replication.role.as_str(); + let enabled = if metrics.replication.enabled { 1 } else { 0 }; + let time_unix_nano = metric_time_unix_nano(metrics); + let mut otel_metrics: Vec = Vec::new(); + + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.enabled", + "Whether replication is enabled for this database (1 enabled, 0 disabled).", + "1", + enabled, + &[("role", role)], + &time_unix_nano, + )); + + // Host-runtime export path is process-local and does not enforce HTTP auth. + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.auth.enabled", + "Whether replication admin auth is enabled for this metrics exporter.", + "1", + 0, + &[], + &time_unix_nano, + )); + + if let Some(primary) = metrics.replication.primary.as_ref() { + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.primary.epoch", + "Current primary replication epoch.", + "1", + primary.epoch, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.primary.head_log_index", + "Current primary head log index.", + "1", + primary.head_log_index, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.primary.retained_floor", + "Current primary retained floor log index.", + "1", + primary.retained_floor, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.primary.replica_count", + "Replica progress reporters known by this primary.", + "1", + primary.replica_count, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.primary.stale_epoch_replica_count", + "Replica reporters currently on stale epochs.", + "1", + primary.stale_epoch_replica_count, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.primary.max_replica_lag", + "Maximum reported lag (log frames) across replicas.", + "1", + primary.max_replica_lag, + &[], + &time_unix_nano, + )); + + otel_metrics.push(otel_sum_metric( + "kitedb.replication.primary.append_attempts", + "Total replication append attempts on the primary commit path.", + "1", + primary.append_attempts, + true, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_sum_metric( + "kitedb.replication.primary.append_failures", + "Total replication append failures on the primary commit path.", + "1", + primary.append_failures, + true, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_sum_metric( + "kitedb.replication.primary.append_successes", + "Total replication append successes on the primary commit path.", + "1", + primary.append_successes, + true, + &[], + &time_unix_nano, + )); + } + + if let Some(replica) = metrics.replication.replica.as_ref() { + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.replica.applied_epoch", + "Replica applied epoch.", + "1", + replica.applied_epoch, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.replica.applied_log_index", + "Replica applied log index.", + "1", + replica.applied_log_index, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.replica.needs_reseed", + "Whether replica currently requires snapshot reseed (1 yes, 0 no).", + "1", + if replica.needs_reseed { 1 } else { 0 }, + &[], + &time_unix_nano, + )); + otel_metrics.push(otel_gauge_metric( + "kitedb.replication.replica.last_error_present", + "Whether replica currently has a non-empty last_error value (1 yes, 0 no).", + "1", + if replica.last_error.is_some() { 1 } else { 0 }, + &[], + &time_unix_nano, + )); + } + + let payload = json!({ + "resourceMetrics": [ + { + "resource": { + "attributes": [ + otel_attr_string("service.name", "kitedb"), + otel_attr_string("kitedb.database.path", metrics.path.as_str()), + otel_attr_string("kitedb.metrics.scope", "replication"), + ] + }, + "scopeMetrics": [ + { + "scope": { + "name": "kitedb.metrics.replication", + "version": env!("CARGO_PKG_VERSION"), + }, + "metrics": otel_metrics, + } + ] + } + ] + }); + + serde_json::to_string(&payload).unwrap_or_else(|_| "{\"resourceMetrics\":[]}".to_string()) +} + +/// Render replication metrics in OpenTelemetry OTLP protobuf wire format. +pub fn render_replication_metrics_otel_protobuf(metrics: &DatabaseMetrics) -> Vec { + let role = metrics.replication.role.as_str(); + let enabled = if metrics.replication.enabled { 1 } else { 0 }; + let time_unix_nano = metric_time_unix_nano_u64(metrics); + let mut otel_metrics: Vec = Vec::new(); + + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.enabled", + "Whether replication is enabled for this database (1 enabled, 0 disabled).", + "1", + enabled, + &[("role", role)], + time_unix_nano, + )); + + // Host-runtime export path is process-local and does not enforce HTTP auth. + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.auth.enabled", + "Whether replication admin auth is enabled for this metrics exporter.", + "1", + 0, + &[], + time_unix_nano, + )); + + if let Some(primary) = metrics.replication.primary.as_ref() { + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.primary.epoch", + "Current primary replication epoch.", + "1", + primary.epoch, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.primary.head_log_index", + "Current primary head log index.", + "1", + primary.head_log_index, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.primary.retained_floor", + "Current primary retained floor log index.", + "1", + primary.retained_floor, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.primary.replica_count", + "Replica progress reporters known by this primary.", + "1", + primary.replica_count, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.primary.stale_epoch_replica_count", + "Replica reporters currently on stale epochs.", + "1", + primary.stale_epoch_replica_count, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.primary.max_replica_lag", + "Maximum reported lag (log frames) across replicas.", + "1", + primary.max_replica_lag, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_sum_metric( + "kitedb.replication.primary.append_attempts", + "Total replication append attempts on the primary commit path.", + "1", + primary.append_attempts, + true, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_sum_metric( + "kitedb.replication.primary.append_failures", + "Total replication append failures on the primary commit path.", + "1", + primary.append_failures, + true, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_sum_metric( + "kitedb.replication.primary.append_successes", + "Total replication append successes on the primary commit path.", + "1", + primary.append_successes, + true, + &[], + time_unix_nano, + )); + } + + if let Some(replica) = metrics.replication.replica.as_ref() { + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.replica.applied_epoch", + "Replica applied epoch.", + "1", + replica.applied_epoch, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.replica.applied_log_index", + "Replica applied log index.", + "1", + replica.applied_log_index, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.replica.needs_reseed", + "Whether replica currently requires snapshot reseed (1 yes, 0 no).", + "1", + if replica.needs_reseed { 1 } else { 0 }, + &[], + time_unix_nano, + )); + otel_metrics.push(otel_proto_gauge_metric( + "kitedb.replication.replica.last_error_present", + "Whether replica currently has a non-empty last_error value (1 yes, 0 no).", + "1", + if replica.last_error.is_some() { 1 } else { 0 }, + &[], + time_unix_nano, + )); + } + + let request = OtelExportMetricsServiceRequest { + resource_metrics: vec![OtelResourceMetrics { + resource: Some(OtelResource { + attributes: vec![ + otel_proto_attr_string("service.name", "kitedb"), + otel_proto_attr_string("kitedb.database.path", metrics.path.as_str()), + otel_proto_attr_string("kitedb.metrics.scope", "replication"), + ], + dropped_attributes_count: 0, + entity_refs: Vec::new(), + }), + scope_metrics: vec![OtelScopeMetrics { + scope: Some(OtelInstrumentationScope { + name: "kitedb.metrics.replication".to_string(), + version: env!("CARGO_PKG_VERSION").to_string(), + attributes: Vec::new(), + dropped_attributes_count: 0, + }), + metrics: otel_metrics, + schema_url: String::new(), + }], + schema_url: String::new(), + }], + }; + request.encode_to_vec() +} + pub fn health_check_single_file(db: &SingleFileDB) -> HealthCheckResult { let mut checks = Vec::new(); @@ -214,6 +2354,77 @@ pub fn health_check_single_file(db: &SingleFileDB) -> HealthCheckResult { HealthCheckResult { healthy, checks } } +fn build_replication_metrics( + primary: Option, + replica: Option, +) -> ReplicationMetrics { + let role = if primary.is_some() { + "primary" + } else if replica.is_some() { + "replica" + } else { + "disabled" + }; + + ReplicationMetrics { + enabled: role != "disabled", + role: role.to_string(), + primary: primary.map(build_primary_replication_metrics), + replica: replica.map(build_replica_replication_metrics), + } +} + +fn build_primary_replication_metrics( + status: PrimaryReplicationStatus, +) -> PrimaryReplicationMetrics { + let mut max_replica_lag = 0u64; + let mut min_replica_applied_log_index: Option = None; + let mut stale_epoch_replica_count = 0u64; + + for lag in &status.replica_lags { + if lag.epoch != status.epoch { + stale_epoch_replica_count = stale_epoch_replica_count.saturating_add(1); + } + + if lag.epoch == status.epoch { + let lag_value = status.head_log_index.saturating_sub(lag.applied_log_index); + max_replica_lag = max_replica_lag.max(lag_value); + min_replica_applied_log_index = Some(match min_replica_applied_log_index { + Some(current) => current.min(lag.applied_log_index), + None => lag.applied_log_index, + }); + } else if lag.epoch < status.epoch { + max_replica_lag = max_replica_lag.max(status.head_log_index); + } + } + + PrimaryReplicationMetrics { + epoch: status.epoch as i64, + head_log_index: status.head_log_index as i64, + retained_floor: status.retained_floor as i64, + replica_count: status.replica_lags.len() as i64, + stale_epoch_replica_count: stale_epoch_replica_count as i64, + max_replica_lag: max_replica_lag as i64, + min_replica_applied_log_index: min_replica_applied_log_index.map(|value| value as i64), + sidecar_path: status.sidecar_path.to_string_lossy().to_string(), + last_token: status.last_token.map(|token| token.to_string()), + append_attempts: status.append_attempts as i64, + append_failures: status.append_failures as i64, + append_successes: status.append_successes as i64, + } +} + +fn build_replica_replication_metrics( + status: ReplicaReplicationStatus, +) -> ReplicaReplicationMetrics { + ReplicaReplicationMetrics { + applied_epoch: status.applied_epoch as i64, + applied_log_index: status.applied_log_index as i64, + needs_reseed: status.needs_reseed, + last_error: status.last_error, + } +} + fn calc_hit_rate(hits: u64, misses: u64) -> f64 { let total = hits + misses; if total > 0 { @@ -338,3 +2549,187 @@ fn system_time_to_millis(time: SystemTime) -> i64 { .unwrap_or_default() .as_millis() as i64 } + +fn escape_prometheus_label_value(value: &str) -> String { + value + .replace('\\', "\\\\") + .replace('"', "\\\"") + .replace('\n', "\\n") +} + +fn format_prometheus_labels(labels: &[(&str, &str)]) -> String { + if labels.is_empty() { + return String::new(); + } + + let rendered = labels + .iter() + .map(|(key, value)| format!("{key}=\"{}\"", escape_prometheus_label_value(value))) + .collect::>() + .join(","); + format!("{{{rendered}}}") +} + +fn push_prometheus_help(lines: &mut Vec, metric: &str, metric_type: &str, help: &str) { + lines.push(format!("# HELP {metric} {help}")); + lines.push(format!("# TYPE {metric} {metric_type}")); +} + +fn push_prometheus_sample( + lines: &mut Vec, + metric: &str, + value: i64, + labels: &[(&str, &str)], +) { + lines.push(format!( + "{metric}{} {value}", + format_prometheus_labels(labels) + )); +} + +fn metric_time_unix_nano(metrics: &DatabaseMetrics) -> String { + metric_time_unix_nano_u64(metrics).to_string() +} + +fn metric_time_unix_nano_u64(metrics: &DatabaseMetrics) -> u64 { + let millis = metrics.collected_at_ms.max(0) as u64; + millis.saturating_mul(1_000_000) +} + +fn otel_attr_string(key: &str, value: &str) -> Value { + json!({ + "key": key, + "value": { "stringValue": value } + }) +} + +fn otel_attributes(labels: &[(&str, &str)]) -> Vec { + labels + .iter() + .map(|(key, value)| otel_attr_string(key, value)) + .collect() +} + +fn otel_gauge_metric( + name: &str, + description: &str, + unit: &str, + value: i64, + labels: &[(&str, &str)], + time_unix_nano: &str, +) -> Value { + json!({ + "name": name, + "description": description, + "unit": unit, + "gauge": { + "dataPoints": [ + { + "attributes": otel_attributes(labels), + "asInt": value, + "timeUnixNano": time_unix_nano, + } + ] + } + }) +} + +fn otel_sum_metric( + name: &str, + description: &str, + unit: &str, + value: i64, + is_monotonic: bool, + labels: &[(&str, &str)], + time_unix_nano: &str, +) -> Value { + json!({ + "name": name, + "description": description, + "unit": unit, + "sum": { + // CUMULATIVE + "aggregationTemporality": 2, + "isMonotonic": is_monotonic, + "dataPoints": [ + { + "attributes": otel_attributes(labels), + "asInt": value, + "timeUnixNano": time_unix_nano, + } + ] + } + }) +} + +fn otel_proto_attr_string(key: &str, value: &str) -> OtelKeyValue { + OtelKeyValue { + key: key.to_string(), + value: Some(OtelAnyValue { + value: Some(otel_any_value::Value::StringValue(value.to_string())), + }), + } +} + +fn otel_proto_attributes(labels: &[(&str, &str)]) -> Vec { + labels + .iter() + .map(|(key, value)| otel_proto_attr_string(key, value)) + .collect() +} + +fn otel_proto_number_data_point( + value: i64, + labels: &[(&str, &str)], + time_unix_nano: u64, +) -> OtelNumberDataPoint { + OtelNumberDataPoint { + attributes: otel_proto_attributes(labels), + start_time_unix_nano: 0, + time_unix_nano, + exemplars: Vec::new(), + flags: 0, + value: Some(otel_number_data_point::Value::AsInt(value)), + } +} + +fn otel_proto_gauge_metric( + name: &str, + description: &str, + unit: &str, + value: i64, + labels: &[(&str, &str)], + time_unix_nano: u64, +) -> OtelMetric { + OtelMetric { + name: name.to_string(), + description: description.to_string(), + unit: unit.to_string(), + metadata: Vec::new(), + data: Some(otel_metric::Data::Gauge(OtelGauge { + data_points: vec![otel_proto_number_data_point(value, labels, time_unix_nano)], + })), + } +} + +fn otel_proto_sum_metric( + name: &str, + description: &str, + unit: &str, + value: i64, + is_monotonic: bool, + labels: &[(&str, &str)], + time_unix_nano: u64, +) -> OtelMetric { + OtelMetric { + name: name.to_string(), + description: description.to_string(), + unit: unit.to_string(), + metadata: Vec::new(), + data: Some(otel_metric::Data::Sum(OtelSum { + data_points: vec![otel_proto_number_data_point(value, labels, time_unix_nano)], + aggregation_temporality: OtelAggregationTemporality::Cumulative as i32, + is_monotonic, + })), + } +} diff --git a/ray-rs/src/napi_bindings/database.rs b/ray-rs/src/napi_bindings/database.rs index f8b3596..a255ace 100644 --- a/ray-rs/src/napi_bindings/database.rs +++ b/ray-rs/src/napi_bindings/database.rs @@ -5,19 +5,22 @@ use napi::bindgen_prelude::*; use napi_derive::napi; use std::path::PathBuf; +use std::str::FromStr; use super::traversal::{ JsPathConfig, JsPathResult, JsTraversalDirection, JsTraversalResult, JsTraversalStep, JsTraverseOptions, }; +use crate::api::kite::KiteRuntimeProfile as RustKiteRuntimeProfile; use crate::api::pathfinding::{bfs, dijkstra, yen_k_shortest, PathConfig}; use crate::api::traversal::{ TraversalBuilder as RustTraversalBuilder, TraversalDirection, TraverseOptions, }; use crate::backup as core_backup; use crate::core::single_file::{ - close_single_file, is_single_file_path, open_single_file, single_file_extension, - ResizeWalOptions as RustResizeWalOptions, SingleFileDB as RustSingleFileDB, + close_single_file, close_single_file_with_options, is_single_file_path, open_single_file, + single_file_extension, ResizeWalOptions as RustResizeWalOptions, + SingleFileCloseOptions as RustSingleFileCloseOptions, SingleFileDB as RustSingleFileDB, SingleFileOpenOptions as RustOpenOptions, SingleFileOptimizeOptions as RustSingleFileOptimizeOptions, SnapshotParseMode as RustSnapshotParseMode, SyncMode as RustSyncMode, @@ -25,6 +28,11 @@ use crate::core::single_file::{ }; use crate::export as ray_export; use crate::metrics as core_metrics; +use crate::replication::primary::{ + PrimaryReplicationStatus, PrimaryRetentionOutcome, ReplicaLagStatus, +}; +use crate::replication::replica::ReplicaReplicationStatus; +use crate::replication::types::{CommitToken, ReplicationRole as RustReplicationRole}; use crate::streaming; use crate::types::{ CheckResult as RustCheckResult, ETypeId, Edge, EdgeWithProps as CoreEdgeWithProps, NodeId, @@ -83,6 +91,25 @@ impl From for RustSnapshotParseMode { } } +/// Replication role for single-file open options +#[napi(string_enum)] +#[derive(Debug)] +pub enum JsReplicationRole { + Disabled, + Primary, + Replica, +} + +impl From for RustReplicationRole { + fn from(role: JsReplicationRole) -> Self { + match role { + JsReplicationRole::Disabled => RustReplicationRole::Disabled, + JsReplicationRole::Primary => RustReplicationRole::Primary, + JsReplicationRole::Replica => RustReplicationRole::Replica, + } + } +} + // ============================================================================ // Open Options // ============================================================================ @@ -135,6 +162,20 @@ pub struct OpenOptions { pub group_commit_window_ms: Option, /// Snapshot parse mode: "Strict" or "Salvage" (single-file only) pub snapshot_parse_mode: Option, + /// Replication role: "Disabled", "Primary", or "Replica" + pub replication_role: Option, + /// Replication sidecar path override + pub replication_sidecar_path: Option, + /// Source primary db path (replica role only) + pub replication_source_db_path: Option, + /// Source primary sidecar path (replica role only) + pub replication_source_sidecar_path: Option, + /// Segment rotation threshold in bytes (primary role only) + pub replication_segment_max_bytes: Option, + /// Minimum retained entries window (primary role only) + pub replication_retention_min_entries: Option, + /// Minimum retained segment age in milliseconds (primary role only) + pub replication_retention_min_ms: Option, } impl From for RustOpenOptions { @@ -221,11 +262,119 @@ impl From for RustOpenOptions { if let Some(mode) = opts.snapshot_parse_mode { rust_opts = rust_opts.snapshot_parse_mode(mode.into()); } + if let Some(role) = opts.replication_role { + rust_opts = rust_opts.replication_role(role.into()); + } + if let Some(path) = opts.replication_sidecar_path { + rust_opts = rust_opts.replication_sidecar_path(path); + } + if let Some(path) = opts.replication_source_db_path { + rust_opts = rust_opts.replication_source_db_path(path); + } + if let Some(path) = opts.replication_source_sidecar_path { + rust_opts = rust_opts.replication_source_sidecar_path(path); + } + if let Some(value) = opts.replication_segment_max_bytes { + if value >= 0 { + rust_opts = rust_opts.replication_segment_max_bytes(value as u64); + } + } + if let Some(value) = opts.replication_retention_min_entries { + if value >= 0 { + rust_opts = rust_opts.replication_retention_min_entries(value as u64); + } + } + if let Some(value) = opts.replication_retention_min_ms { + if value >= 0 { + rust_opts = rust_opts.replication_retention_min_ms(value as u64); + } + } rust_opts } } +fn js_sync_mode_from_rust(mode: RustSyncMode) -> JsSyncMode { + match mode { + RustSyncMode::Full => JsSyncMode::Full, + RustSyncMode::Normal => JsSyncMode::Normal, + RustSyncMode::Off => JsSyncMode::Off, + } +} + +fn js_replication_role_from_rust(role: RustReplicationRole) -> JsReplicationRole { + match role { + RustReplicationRole::Disabled => JsReplicationRole::Disabled, + RustReplicationRole::Primary => JsReplicationRole::Primary, + RustReplicationRole::Replica => JsReplicationRole::Replica, + } +} + +fn open_options_from_kite_profile_options(opts: crate::api::kite::KiteOptions) -> OpenOptions { + OpenOptions { + read_only: Some(opts.read_only), + create_if_missing: Some(opts.create_if_missing), + mvcc: Some(opts.mvcc), + mvcc_gc_interval_ms: opts.mvcc_gc_interval_ms.and_then(|v| i64::try_from(v).ok()), + mvcc_retention_ms: opts.mvcc_retention_ms.and_then(|v| i64::try_from(v).ok()), + mvcc_max_chain_depth: opts + .mvcc_max_chain_depth + .and_then(|v| i64::try_from(v).ok()), + page_size: None, + wal_size: opts.wal_size.and_then(|v| u32::try_from(v).ok()), + auto_checkpoint: None, + checkpoint_threshold: opts.checkpoint_threshold, + background_checkpoint: None, + checkpoint_compression: None, + cache_enabled: None, + cache_max_node_props: None, + cache_max_edge_props: None, + cache_max_traversal_entries: None, + cache_max_query_entries: None, + cache_query_ttl_ms: None, + sync_mode: Some(js_sync_mode_from_rust(opts.sync_mode)), + group_commit_enabled: Some(opts.group_commit_enabled), + group_commit_window_ms: i64::try_from(opts.group_commit_window_ms).ok(), + snapshot_parse_mode: None, + replication_role: Some(js_replication_role_from_rust(opts.replication_role)), + replication_sidecar_path: opts + .replication_sidecar_path + .map(|p| p.to_string_lossy().to_string()), + replication_source_db_path: opts + .replication_source_db_path + .map(|p| p.to_string_lossy().to_string()), + replication_source_sidecar_path: opts + .replication_source_sidecar_path + .map(|p| p.to_string_lossy().to_string()), + replication_segment_max_bytes: opts + .replication_segment_max_bytes + .and_then(|v| i64::try_from(v).ok()), + replication_retention_min_entries: opts + .replication_retention_min_entries + .and_then(|v| i64::try_from(v).ok()), + replication_retention_min_ms: opts + .replication_retention_min_ms + .and_then(|v| i64::try_from(v).ok()), + } +} + +/// Runtime profile preset for open/close behavior. +#[napi(object)] +#[derive(Debug, Default)] +pub struct RuntimeProfile { + /// Open-time options for `Database.open(path, options)`. + pub open_options: OpenOptions, + /// Optional close-time checkpoint trigger threshold. + pub close_checkpoint_if_wal_usage_at_least: Option, +} + +fn runtime_profile_from_rust(profile: RustKiteRuntimeProfile) -> RuntimeProfile { + RuntimeProfile { + open_options: open_options_from_kite_profile_options(profile.options), + close_checkpoint_if_wal_usage_at_least: profile.close_checkpoint_if_wal_usage_at_least, + } +} + // ============================================================================ // Single-File Maintenance Options // ============================================================================ @@ -374,6 +523,102 @@ pub struct MvccStats { pub committed_writes_pruned: i64, } +/// Per-replica lag entry on primary status +#[napi(object)] +pub struct JsReplicaLagStatus { + pub replica_id: String, + pub epoch: i64, + pub applied_log_index: i64, +} + +/// Primary replication runtime status +#[napi(object)] +pub struct JsPrimaryReplicationStatus { + pub role: String, + pub epoch: i64, + pub head_log_index: i64, + pub retained_floor: i64, + pub replica_lags: Vec, + pub sidecar_path: String, + pub last_token: Option, + pub append_attempts: i64, + pub append_failures: i64, + pub append_successes: i64, +} + +/// Replica replication runtime status +#[napi(object)] +pub struct JsReplicaReplicationStatus { + pub role: String, + pub source_db_path: Option, + pub source_sidecar_path: Option, + pub applied_epoch: i64, + pub applied_log_index: i64, + pub last_error: Option, + pub needs_reseed: bool, +} + +/// Retention run outcome +#[napi(object)] +pub struct JsPrimaryRetentionOutcome { + pub pruned_segments: i64, + pub retained_floor: i64, +} + +impl From for JsReplicaLagStatus { + fn from(value: ReplicaLagStatus) -> Self { + Self { + replica_id: value.replica_id, + epoch: value.epoch as i64, + applied_log_index: value.applied_log_index as i64, + } + } +} + +impl From for JsPrimaryReplicationStatus { + fn from(value: PrimaryReplicationStatus) -> Self { + Self { + role: value.role.to_string(), + epoch: value.epoch as i64, + head_log_index: value.head_log_index as i64, + retained_floor: value.retained_floor as i64, + replica_lags: value.replica_lags.into_iter().map(Into::into).collect(), + sidecar_path: value.sidecar_path.to_string_lossy().to_string(), + last_token: value.last_token.map(|token| token.to_string()), + append_attempts: value.append_attempts as i64, + append_failures: value.append_failures as i64, + append_successes: value.append_successes as i64, + } + } +} + +impl From for JsReplicaReplicationStatus { + fn from(value: ReplicaReplicationStatus) -> Self { + Self { + role: value.role.to_string(), + source_db_path: value + .source_db_path + .map(|path| path.to_string_lossy().to_string()), + source_sidecar_path: value + .source_sidecar_path + .map(|path| path.to_string_lossy().to_string()), + applied_epoch: value.applied_epoch as i64, + applied_log_index: value.applied_log_index as i64, + last_error: value.last_error, + needs_reseed: value.needs_reseed, + } + } +} + +impl From for JsPrimaryRetentionOutcome { + fn from(value: PrimaryRetentionOutcome) -> Self { + Self { + pruned_segments: value.pruned_segments as i64, + retained_floor: value.retained_floor as i64, + } + } +} + /// Options for export #[napi(object)] pub struct ExportOptions { @@ -601,6 +846,41 @@ pub struct MvccMetrics { pub committed_writes_pruned: i64, } +/// Primary replication metrics +#[napi(object)] +pub struct PrimaryReplicationMetrics { + pub epoch: i64, + pub head_log_index: i64, + pub retained_floor: i64, + pub replica_count: i64, + pub stale_epoch_replica_count: i64, + pub max_replica_lag: i64, + pub min_replica_applied_log_index: Option, + pub sidecar_path: String, + pub last_token: Option, + pub append_attempts: i64, + pub append_failures: i64, + pub append_successes: i64, +} + +/// Replica replication metrics +#[napi(object)] +pub struct ReplicaReplicationMetrics { + pub applied_epoch: i64, + pub applied_log_index: i64, + pub needs_reseed: bool, + pub last_error: Option, +} + +/// Replication metrics +#[napi(object)] +pub struct ReplicationMetrics { + pub enabled: bool, + pub role: String, + pub primary: Option, + pub replica: Option, +} + /// Memory metrics #[napi(object)] pub struct MemoryMetrics { @@ -619,6 +899,7 @@ pub struct DatabaseMetrics { pub data: DataMetrics, pub cache: CacheMetrics, pub mvcc: Option, + pub replication: ReplicationMetrics, pub memory: MemoryMetrics, /// Timestamp in milliseconds since epoch pub collected_at: i64, @@ -639,6 +920,47 @@ pub struct HealthCheckResult { pub checks: Vec, } +/// OTLP HTTP metrics push result. +#[napi(object)] +pub struct OtlpHttpExportResult { + pub status_code: i64, + pub response_body: String, +} + +/// OTLP collector push options (host runtime). +#[napi(object)] +#[derive(Default, Clone)] +pub struct PushReplicationMetricsOtelOptions { + pub timeout_ms: Option, + pub bearer_token: Option, + pub retry_max_attempts: Option, + pub retry_backoff_ms: Option, + pub retry_backoff_max_ms: Option, + pub retry_jitter_ratio: Option, + pub adaptive_retry: Option, + pub adaptive_retry_mode: Option, + pub adaptive_retry_ewma_alpha: Option, + pub circuit_breaker_failure_threshold: Option, + pub circuit_breaker_open_ms: Option, + pub circuit_breaker_half_open_probes: Option, + pub circuit_breaker_state_path: Option, + pub circuit_breaker_state_url: Option, + pub circuit_breaker_state_patch: Option, + pub circuit_breaker_state_patch_batch: Option, + pub circuit_breaker_state_patch_batch_max_keys: Option, + pub circuit_breaker_state_patch_merge: Option, + pub circuit_breaker_state_patch_merge_max_keys: Option, + pub circuit_breaker_state_patch_retry_max_attempts: Option, + pub circuit_breaker_state_cas: Option, + pub circuit_breaker_state_lease_id: Option, + pub circuit_breaker_scope_key: Option, + pub compression_gzip: Option, + pub https_only: Option, + pub ca_cert_pem_path: Option, + pub client_cert_pem_path: Option, + pub client_key_pem_path: Option, +} + impl From for CacheLayerMetrics { fn from(metrics: core_metrics::CacheLayerMetrics) -> Self { CacheLayerMetrics { @@ -695,6 +1017,47 @@ impl From for MvccMetrics { } } +impl From for PrimaryReplicationMetrics { + fn from(metrics: core_metrics::PrimaryReplicationMetrics) -> Self { + PrimaryReplicationMetrics { + epoch: metrics.epoch, + head_log_index: metrics.head_log_index, + retained_floor: metrics.retained_floor, + replica_count: metrics.replica_count, + stale_epoch_replica_count: metrics.stale_epoch_replica_count, + max_replica_lag: metrics.max_replica_lag, + min_replica_applied_log_index: metrics.min_replica_applied_log_index, + sidecar_path: metrics.sidecar_path, + last_token: metrics.last_token, + append_attempts: metrics.append_attempts, + append_failures: metrics.append_failures, + append_successes: metrics.append_successes, + } + } +} + +impl From for ReplicaReplicationMetrics { + fn from(metrics: core_metrics::ReplicaReplicationMetrics) -> Self { + ReplicaReplicationMetrics { + applied_epoch: metrics.applied_epoch, + applied_log_index: metrics.applied_log_index, + needs_reseed: metrics.needs_reseed, + last_error: metrics.last_error, + } + } +} + +impl From for ReplicationMetrics { + fn from(metrics: core_metrics::ReplicationMetrics) -> Self { + ReplicationMetrics { + enabled: metrics.enabled, + role: metrics.role, + primary: metrics.primary.map(Into::into), + replica: metrics.replica.map(Into::into), + } + } +} + impl From for MemoryMetrics { fn from(metrics: core_metrics::MemoryMetrics) -> Self { MemoryMetrics { @@ -715,6 +1078,7 @@ impl From for DatabaseMetrics { data: metrics.data.into(), cache: metrics.cache.into(), mvcc: metrics.mvcc.map(Into::into), + replication: metrics.replication.into(), memory: metrics.memory.into(), collected_at: metrics.collected_at_ms, } @@ -740,6 +1104,15 @@ impl From for HealthCheckResult { } } +impl From for OtlpHttpExportResult { + fn from(result: core_metrics::OtlpHttpExportResult) -> Self { + OtlpHttpExportResult { + status_code: result.status_code, + response_body: result.response_body, + } + } +} + // ============================================================================ // Property Value (JS-compatible) // ============================================================================ @@ -945,6 +1318,21 @@ impl Database { Ok(()) } + /// Close the database and run a blocking checkpoint if WAL usage is above threshold. + #[napi] + pub fn close_with_checkpoint_if_wal_over(&mut self, threshold: f64) -> Result<()> { + if let Some(db) = self.inner.take() { + match db { + DatabaseInner::SingleFile(db) => close_single_file_with_options( + db, + RustSingleFileCloseOptions::new().checkpoint_if_wal_usage_at_least(threshold), + ) + .map_err(|e| Error::from_reason(format!("Failed to close database: {e}")))?, + } + } + Ok(()) + } + /// Check if database is open #[napi(getter)] pub fn is_open(&self) -> bool { @@ -1013,6 +1401,18 @@ impl Database { } } + /// Commit the current transaction and return replication token when primary replication is enabled. + #[napi] + pub fn commit_with_token(&self) -> Result> { + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .commit_with_token() + .map(|token| token.map(|value| value.to_string())) + .map_err(|e| Error::from_reason(format!("Failed to commit with token: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + /// Rollback the current transaction #[napi] pub fn rollback(&self) -> Result<()> { @@ -1033,6 +1433,172 @@ impl Database { } } + /// Wait until the DB has observed at least the provided commit token. + #[napi] + pub fn wait_for_token(&self, token: String, timeout_ms: i64) -> Result { + if timeout_ms < 0 { + return Err(Error::from_reason("timeoutMs must be non-negative")); + } + let token = CommitToken::from_str(&token) + .map_err(|e| Error::from_reason(format!("Invalid commit token: {e}")))?; + + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .wait_for_token(token, timeout_ms as u64) + .map_err(|e| Error::from_reason(format!("Failed waiting for token: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + + // ======================================================================== + // Replication Methods + // ======================================================================== + + /// Primary replication status when role=primary, else null. + #[napi] + pub fn primary_replication_status(&self) -> Result> { + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => Ok(db.primary_replication_status().map(Into::into)), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Replica replication status when role=replica, else null. + #[napi] + pub fn replica_replication_status(&self) -> Result> { + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => Ok(db.replica_replication_status().map(Into::into)), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Promote this primary to the next replication epoch. + #[napi] + pub fn primary_promote_to_next_epoch(&self) -> Result { + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .primary_promote_to_next_epoch() + .map(|epoch| epoch as i64) + .map_err(|e| Error::from_reason(format!("Failed to promote primary: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Report replica applied cursor to primary for retention decisions. + #[napi] + pub fn primary_report_replica_progress( + &self, + replica_id: String, + epoch: i64, + applied_log_index: i64, + ) -> Result<()> { + if epoch < 0 || applied_log_index < 0 { + return Err(Error::from_reason( + "epoch and appliedLogIndex must be non-negative", + )); + } + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .primary_report_replica_progress(&replica_id, epoch as u64, applied_log_index as u64) + .map_err(|e| Error::from_reason(format!("Failed to report replica progress: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Execute replication retention on primary. + #[napi] + pub fn primary_run_retention(&self) -> Result { + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .primary_run_retention() + .map(Into::into) + .map_err(|e| Error::from_reason(format!("Failed to run retention: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Export latest primary snapshot metadata and optional bytes as transport JSON. + #[napi] + pub fn export_replication_snapshot_transport_json( + &self, + include_data: Option, + ) -> Result { + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .primary_export_snapshot_transport_json(include_data.unwrap_or(false)) + .map_err(|e| Error::from_reason(format!("Failed to export replication snapshot: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Export primary replication log page (cursor + limits) as transport JSON. + #[napi] + pub fn export_replication_log_transport_json( + &self, + cursor: Option, + max_frames: Option, + max_bytes: Option, + include_payload: Option, + ) -> Result { + let max_frames = max_frames.unwrap_or(128); + let max_bytes = max_bytes.unwrap_or(1_048_576); + if max_frames <= 0 { + return Err(Error::from_reason("maxFrames must be positive")); + } + if max_bytes <= 0 { + return Err(Error::from_reason("maxBytes must be positive")); + } + + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .primary_export_log_transport_json( + cursor.as_deref(), + max_frames as usize, + max_bytes as usize, + include_payload.unwrap_or(true), + ) + .map_err(|e| Error::from_reason(format!("Failed to export replication log: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Bootstrap a replica from the primary snapshot. + #[napi] + pub fn replica_bootstrap_from_snapshot(&self) -> Result<()> { + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .replica_bootstrap_from_snapshot() + .map_err(|e| Error::from_reason(format!("Failed to bootstrap replica: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Pull and apply up to maxFrames replication frames on replica. + #[napi] + pub fn replica_catch_up_once(&self, max_frames: i64) -> Result { + if max_frames < 0 { + return Err(Error::from_reason("maxFrames must be non-negative")); + } + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .replica_catch_up_once(max_frames as usize) + .map(|count| count as i64) + .map_err(|e| Error::from_reason(format!("Failed replica catch-up: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + + /// Force a replica reseed from current primary snapshot. + #[napi] + pub fn replica_reseed_from_snapshot(&self) -> Result<()> { + match self.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .replica_reseed_from_snapshot() + .map_err(|e| Error::from_reason(format!("Failed to reseed replica: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } + } + // ======================================================================== // Node Operations // ======================================================================== @@ -2879,6 +3445,24 @@ pub fn open_database(path: String, options: Option) -> Result RuntimeProfile { + runtime_profile_from_rust(RustKiteRuntimeProfile::safe()) +} + +/// Recommended balanced profile (good throughput + durability tradeoff). +#[napi] +pub fn recommended_balanced_profile() -> RuntimeProfile { + runtime_profile_from_rust(RustKiteRuntimeProfile::balanced()) +} + +/// Recommended profile for reopen-heavy workloads. +#[napi] +pub fn recommended_reopen_heavy_profile() -> RuntimeProfile { + runtime_profile_from_rust(RustKiteRuntimeProfile::reopen_heavy()) +} + // ============================================================================ // Metrics / Health // ============================================================================ @@ -2891,6 +3475,441 @@ pub fn collect_metrics(db: &Database) -> Result { } } +#[napi] +pub fn collect_replication_metrics_prometheus(db: &Database) -> Result { + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + Ok(core_metrics::collect_replication_metrics_prometheus_single_file(db)) + } + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn collect_replication_metrics_otel_json(db: &Database) -> Result { + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + Ok(core_metrics::collect_replication_metrics_otel_json_single_file(db)) + } + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn collect_replication_metrics_otel_protobuf(db: &Database) -> Result { + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + Ok(core_metrics::collect_replication_metrics_otel_protobuf_single_file(db).into()) + } + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn collect_replication_snapshot_transport_json( + db: &Database, + include_data: Option, +) -> Result { + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .primary_export_snapshot_transport_json(include_data.unwrap_or(false)) + .map_err(|e| Error::from_reason(format!("Failed to export replication snapshot: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn collect_replication_log_transport_json( + db: &Database, + cursor: Option, + max_frames: Option, + max_bytes: Option, + include_payload: Option, +) -> Result { + let max_frames = max_frames.unwrap_or(128); + let max_bytes = max_bytes.unwrap_or(1_048_576); + if max_frames <= 0 { + return Err(Error::from_reason("maxFrames must be positive")); + } + if max_bytes <= 0 { + return Err(Error::from_reason("maxBytes must be positive")); + } + + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => db + .primary_export_log_transport_json( + cursor.as_deref(), + max_frames as usize, + max_bytes as usize, + include_payload.unwrap_or(true), + ) + .map_err(|e| Error::from_reason(format!("Failed to export replication log: {e}"))), + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn push_replication_metrics_otel_json( + db: &Database, + endpoint: String, + timeout_ms: i64, + bearer_token: Option, +) -> Result { + if timeout_ms <= 0 { + return Err(Error::from_reason("timeoutMs must be positive")); + } + + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + core_metrics::push_replication_metrics_otel_json_single_file( + db, + &endpoint, + timeout_ms as u64, + bearer_token.as_deref(), + ) + .map(Into::into) + .map_err(|e| Error::from_reason(format!("Failed to push replication metrics: {e}"))) + } + None => Err(Error::from_reason("Database is closed")), + } +} + +fn build_core_otel_push_options( + options: PushReplicationMetricsOtelOptions, +) -> Result { + let timeout_ms = options.timeout_ms.unwrap_or(5_000); + if timeout_ms <= 0 { + return Err(Error::from_reason("timeoutMs must be positive")); + } + let retry_max_attempts = options.retry_max_attempts.unwrap_or(1); + if retry_max_attempts <= 0 { + return Err(Error::from_reason("retryMaxAttempts must be positive")); + } + let retry_backoff_ms = options.retry_backoff_ms.unwrap_or(100); + if retry_backoff_ms < 0 { + return Err(Error::from_reason("retryBackoffMs must be non-negative")); + } + let retry_backoff_max_ms = options.retry_backoff_max_ms.unwrap_or(2_000); + if retry_backoff_max_ms < 0 { + return Err(Error::from_reason("retryBackoffMaxMs must be non-negative")); + } + if retry_backoff_max_ms > 0 && retry_backoff_max_ms < retry_backoff_ms { + return Err(Error::from_reason( + "retryBackoffMaxMs must be >= retryBackoffMs when non-zero", + )); + } + let retry_jitter_ratio = options.retry_jitter_ratio.unwrap_or(0.0); + if !(0.0..=1.0).contains(&retry_jitter_ratio) { + return Err(Error::from_reason( + "retryJitterRatio must be within [0.0, 1.0]", + )); + } + let adaptive_retry_mode = match options + .adaptive_retry_mode + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + { + None => core_metrics::OtlpAdaptiveRetryMode::Linear, + Some(value) if value.eq_ignore_ascii_case("linear") => { + core_metrics::OtlpAdaptiveRetryMode::Linear + } + Some(value) if value.eq_ignore_ascii_case("ewma") => core_metrics::OtlpAdaptiveRetryMode::Ewma, + Some(_) => { + return Err(Error::from_reason( + "adaptiveRetryMode must be one of: linear, ewma", + )); + } + }; + let adaptive_retry_ewma_alpha = options.adaptive_retry_ewma_alpha.unwrap_or(0.3); + if !(0.0..=1.0).contains(&adaptive_retry_ewma_alpha) { + return Err(Error::from_reason( + "adaptiveRetryEwmaAlpha must be within [0.0, 1.0]", + )); + } + let circuit_breaker_failure_threshold = options.circuit_breaker_failure_threshold.unwrap_or(0); + if circuit_breaker_failure_threshold < 0 { + return Err(Error::from_reason( + "circuitBreakerFailureThreshold must be non-negative", + )); + } + let circuit_breaker_open_ms = options.circuit_breaker_open_ms.unwrap_or(0); + if circuit_breaker_open_ms < 0 { + return Err(Error::from_reason( + "circuitBreakerOpenMs must be non-negative", + )); + } + if circuit_breaker_failure_threshold > 0 && circuit_breaker_open_ms == 0 { + return Err(Error::from_reason( + "circuitBreakerOpenMs must be positive when circuitBreakerFailureThreshold is set", + )); + } + let circuit_breaker_half_open_probes = options.circuit_breaker_half_open_probes.unwrap_or(1); + if circuit_breaker_half_open_probes < 0 { + return Err(Error::from_reason( + "circuitBreakerHalfOpenProbes must be non-negative", + )); + } + if circuit_breaker_failure_threshold > 0 && circuit_breaker_half_open_probes == 0 { + return Err(Error::from_reason( + "circuitBreakerHalfOpenProbes must be positive when circuitBreakerFailureThreshold is set", + )); + } + if let Some(path) = options.circuit_breaker_state_path.as_deref() { + if path.trim().is_empty() { + return Err(Error::from_reason( + "circuitBreakerStatePath must not be empty when provided", + )); + } + } + if let Some(url) = options.circuit_breaker_state_url.as_deref() { + let trimmed = url.trim(); + if trimmed.is_empty() { + return Err(Error::from_reason( + "circuitBreakerStateUrl must not be empty when provided", + )); + } + if !(trimmed.starts_with("http://") || trimmed.starts_with("https://")) { + return Err(Error::from_reason( + "circuitBreakerStateUrl must use http:// or https://", + )); + } + if options.https_only.unwrap_or(false) && trimmed.starts_with("http://") { + return Err(Error::from_reason( + "circuitBreakerStateUrl must use https when httpsOnly is enabled", + )); + } + } + if options.circuit_breaker_state_path.is_some() && options.circuit_breaker_state_url.is_some() { + return Err(Error::from_reason( + "circuitBreakerStatePath and circuitBreakerStateUrl are mutually exclusive", + )); + } + if options.circuit_breaker_state_patch.unwrap_or(false) + && options.circuit_breaker_state_url.is_none() + { + return Err(Error::from_reason( + "circuitBreakerStatePatch requires circuitBreakerStateUrl", + )); + } + if options.circuit_breaker_state_patch_batch.unwrap_or(false) + && !options.circuit_breaker_state_patch.unwrap_or(false) + { + return Err(Error::from_reason( + "circuitBreakerStatePatchBatch requires circuitBreakerStatePatch", + )); + } + if options.circuit_breaker_state_patch_merge.unwrap_or(false) + && !options.circuit_breaker_state_patch.unwrap_or(false) + { + return Err(Error::from_reason( + "circuitBreakerStatePatchMerge requires circuitBreakerStatePatch", + )); + } + let circuit_breaker_state_patch_batch_max_keys = options + .circuit_breaker_state_patch_batch_max_keys + .unwrap_or(8); + if circuit_breaker_state_patch_batch_max_keys <= 0 { + return Err(Error::from_reason( + "circuitBreakerStatePatchBatchMaxKeys must be positive", + )); + } + let circuit_breaker_state_patch_merge_max_keys = options + .circuit_breaker_state_patch_merge_max_keys + .unwrap_or(32); + if circuit_breaker_state_patch_merge_max_keys <= 0 { + return Err(Error::from_reason( + "circuitBreakerStatePatchMergeMaxKeys must be positive", + )); + } + let circuit_breaker_state_patch_retry_max_attempts = options + .circuit_breaker_state_patch_retry_max_attempts + .unwrap_or(1); + if circuit_breaker_state_patch_retry_max_attempts <= 0 { + return Err(Error::from_reason( + "circuitBreakerStatePatchRetryMaxAttempts must be positive", + )); + } + if options.circuit_breaker_state_cas.unwrap_or(false) + && options.circuit_breaker_state_url.is_none() + { + return Err(Error::from_reason( + "circuitBreakerStateCas requires circuitBreakerStateUrl", + )); + } + if let Some(lease_id) = options.circuit_breaker_state_lease_id.as_deref() { + if lease_id.trim().is_empty() { + return Err(Error::from_reason( + "circuitBreakerStateLeaseId must not be empty when provided", + )); + } + if options.circuit_breaker_state_url.is_none() { + return Err(Error::from_reason( + "circuitBreakerStateLeaseId requires circuitBreakerStateUrl", + )); + } + } + if let Some(scope_key) = options.circuit_breaker_scope_key.as_deref() { + if scope_key.trim().is_empty() { + return Err(Error::from_reason( + "circuitBreakerScopeKey must not be empty when provided", + )); + } + } + + Ok(core_metrics::OtlpHttpPushOptions { + timeout_ms: timeout_ms as u64, + bearer_token: options.bearer_token, + retry_max_attempts: retry_max_attempts as u32, + retry_backoff_ms: retry_backoff_ms as u64, + retry_backoff_max_ms: retry_backoff_max_ms as u64, + retry_jitter_ratio, + adaptive_retry_mode, + adaptive_retry_ewma_alpha, + adaptive_retry: options.adaptive_retry.unwrap_or(false), + circuit_breaker_failure_threshold: circuit_breaker_failure_threshold as u32, + circuit_breaker_open_ms: circuit_breaker_open_ms as u64, + circuit_breaker_half_open_probes: circuit_breaker_half_open_probes as u32, + circuit_breaker_state_path: options.circuit_breaker_state_path, + circuit_breaker_state_url: options.circuit_breaker_state_url, + circuit_breaker_state_patch: options.circuit_breaker_state_patch.unwrap_or(false), + circuit_breaker_state_patch_batch: options.circuit_breaker_state_patch_batch.unwrap_or(false), + circuit_breaker_state_patch_batch_max_keys: circuit_breaker_state_patch_batch_max_keys as u32, + circuit_breaker_state_patch_merge: options.circuit_breaker_state_patch_merge.unwrap_or(false), + circuit_breaker_state_patch_merge_max_keys: circuit_breaker_state_patch_merge_max_keys as u32, + circuit_breaker_state_patch_retry_max_attempts: circuit_breaker_state_patch_retry_max_attempts + as u32, + circuit_breaker_state_cas: options.circuit_breaker_state_cas.unwrap_or(false), + circuit_breaker_state_lease_id: options.circuit_breaker_state_lease_id, + circuit_breaker_scope_key: options.circuit_breaker_scope_key, + compression_gzip: options.compression_gzip.unwrap_or(false), + tls: core_metrics::OtlpHttpTlsOptions { + https_only: options.https_only.unwrap_or(false), + ca_cert_pem_path: options.ca_cert_pem_path, + client_cert_pem_path: options.client_cert_pem_path, + client_key_pem_path: options.client_key_pem_path, + }, + }) +} + +#[napi] +pub fn push_replication_metrics_otel_json_with_options( + db: &Database, + endpoint: String, + options: Option, +) -> Result { + let core_options = build_core_otel_push_options(options.unwrap_or_default())?; + + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + core_metrics::push_replication_metrics_otel_json_single_file_with_options( + db, + &endpoint, + &core_options, + ) + .map(Into::into) + .map_err(|e| Error::from_reason(format!("Failed to push replication metrics: {e}"))) + } + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn push_replication_metrics_otel_protobuf( + db: &Database, + endpoint: String, + timeout_ms: i64, + bearer_token: Option, +) -> Result { + if timeout_ms <= 0 { + return Err(Error::from_reason("timeoutMs must be positive")); + } + + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + core_metrics::push_replication_metrics_otel_protobuf_single_file( + db, + &endpoint, + timeout_ms as u64, + bearer_token.as_deref(), + ) + .map(Into::into) + .map_err(|e| Error::from_reason(format!("Failed to push replication metrics: {e}"))) + } + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn push_replication_metrics_otel_protobuf_with_options( + db: &Database, + endpoint: String, + options: Option, +) -> Result { + let core_options = build_core_otel_push_options(options.unwrap_or_default())?; + + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + core_metrics::push_replication_metrics_otel_protobuf_single_file_with_options( + db, + &endpoint, + &core_options, + ) + .map(Into::into) + .map_err(|e| Error::from_reason(format!("Failed to push replication metrics: {e}"))) + } + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn push_replication_metrics_otel_grpc( + db: &Database, + endpoint: String, + timeout_ms: i64, + bearer_token: Option, +) -> Result { + if timeout_ms <= 0 { + return Err(Error::from_reason("timeoutMs must be positive")); + } + + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + core_metrics::push_replication_metrics_otel_grpc_single_file( + db, + &endpoint, + timeout_ms as u64, + bearer_token.as_deref(), + ) + .map(Into::into) + .map_err(|e| Error::from_reason(format!("Failed to push replication metrics: {e}"))) + } + None => Err(Error::from_reason("Database is closed")), + } +} + +#[napi] +pub fn push_replication_metrics_otel_grpc_with_options( + db: &Database, + endpoint: String, + options: Option, +) -> Result { + let core_options = build_core_otel_push_options(options.unwrap_or_default())?; + + match db.inner.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + core_metrics::push_replication_metrics_otel_grpc_single_file_with_options( + db, + &endpoint, + &core_options, + ) + .map(Into::into) + .map_err(|e| Error::from_reason(format!("Failed to push replication metrics: {e}"))) + } + None => Err(Error::from_reason("Database is closed")), + } +} + #[napi] pub fn health_check(db: &Database) -> Result { match db.inner.as_ref() { diff --git a/ray-rs/src/napi_bindings/kite/mod.rs b/ray-rs/src/napi_bindings/kite/mod.rs index 14f0ff8..5fd72c7 100644 --- a/ray-rs/src/napi_bindings/kite/mod.rs +++ b/ray-rs/src/napi_bindings/kite/mod.rs @@ -35,7 +35,9 @@ use std::sync::Arc; use crate::api::kite::{BatchOp, EdgeDef, Kite as RustKite, KiteOptions, NodeDef}; use crate::types::NodeId; -use super::database::{CheckResult, DbStats, MvccStats}; +use super::database::{ + CheckResult, DbStats, JsPrimaryReplicationStatus, JsReplicaReplicationStatus, MvccStats, +}; use super::database::{JsFullEdge, JsPropValue}; use conversion::{js_value_to_prop_value, key_suffix_from_js}; @@ -124,6 +126,14 @@ impl Kite { if let Some(mode) = options.sync_mode { kite_opts.sync_mode = mode.into(); } + if let Some(enabled) = options.group_commit_enabled { + kite_opts.group_commit_enabled = enabled; + } + if let Some(window_ms) = options.group_commit_window_ms { + if window_ms >= 0 { + kite_opts.group_commit_window_ms = window_ms as u64; + } + } if let Some(wal_size_mb) = options.wal_size_mb { if wal_size_mb > 0 { kite_opts.wal_size = Some((wal_size_mb as usize).saturating_mul(1024 * 1024)); @@ -132,6 +142,36 @@ impl Kite { if let Some(threshold) = options.checkpoint_threshold { kite_opts.checkpoint_threshold = Some(threshold.clamp(0.0, 1.0)); } + if let Some(threshold) = options.close_checkpoint_if_wal_usage_at_least { + kite_opts.close_checkpoint_if_wal_usage_at_least = Some(threshold.clamp(0.0, 1.0)); + } + if let Some(role) = options.replication_role { + kite_opts.replication_role = role.into(); + } + if let Some(path) = options.replication_sidecar_path { + kite_opts.replication_sidecar_path = Some(path.into()); + } + if let Some(path) = options.replication_source_db_path { + kite_opts.replication_source_db_path = Some(path.into()); + } + if let Some(path) = options.replication_source_sidecar_path { + kite_opts.replication_source_sidecar_path = Some(path.into()); + } + if let Some(value) = options.replication_segment_max_bytes { + if value >= 0 { + kite_opts.replication_segment_max_bytes = Some(value as u64); + } + } + if let Some(value) = options.replication_retention_min_entries { + if value >= 0 { + kite_opts.replication_retention_min_entries = Some(value as u64); + } + } + if let Some(value) = options.replication_retention_min_ms { + if value >= 0 { + kite_opts.replication_retention_min_ms = Some(value as u64); + } + } for node in options.nodes { let key_spec = Arc::new(parse_key_spec(&node.name, node.key)?); @@ -856,6 +896,56 @@ impl Kite { self.with_kite(|ray| Ok(ray.raw().has_transaction())) } + /// Primary replication status when role=primary, else null. + #[napi] + pub fn primary_replication_status(&self) -> Result> { + self.with_kite(|ray| Ok(ray.raw().primary_replication_status().map(Into::into))) + } + + /// Replica replication status when role=replica, else null. + #[napi] + pub fn replica_replication_status(&self) -> Result> { + self.with_kite(|ray| Ok(ray.raw().replica_replication_status().map(Into::into))) + } + + /// Pull and apply up to maxFrames replication frames on replica. + #[napi] + pub fn replica_catch_up_once(&self, max_frames: i64) -> Result { + if max_frames < 0 { + return Err(Error::from_reason("maxFrames must be non-negative")); + } + self.with_kite_mut(|ray| { + ray + .raw() + .replica_catch_up_once(max_frames as usize) + .map(|count| count as i64) + .map_err(|e| Error::from_reason(format!("Failed replica catch-up: {e}"))) + }) + } + + /// Force a replica reseed from current primary snapshot. + #[napi] + pub fn replica_reseed_from_snapshot(&self) -> Result<()> { + self.with_kite_mut(|ray| { + ray + .raw() + .replica_reseed_from_snapshot() + .map_err(|e| Error::from_reason(format!("Failed to reseed replica: {e}"))) + }) + } + + /// Promote this primary to the next replication epoch. + #[napi] + pub fn primary_promote_to_next_epoch(&self) -> Result { + self.with_kite_mut(|ray| { + ray + .raw() + .primary_promote_to_next_epoch() + .map(|epoch| epoch as i64) + .map_err(|e| Error::from_reason(format!("Failed to promote primary: {e}"))) + }) + } + /// Perform a checkpoint (compact WAL into snapshot) #[napi] pub fn checkpoint(&self) -> Result<()> { @@ -1095,6 +1185,36 @@ impl napi::Task for OpenKiteTask { if let Some(threshold) = self.options.checkpoint_threshold { kite_opts.checkpoint_threshold = Some(threshold.clamp(0.0, 1.0)); } + if let Some(threshold) = self.options.close_checkpoint_if_wal_usage_at_least { + kite_opts.close_checkpoint_if_wal_usage_at_least = Some(threshold.clamp(0.0, 1.0)); + } + if let Some(role) = self.options.replication_role.take() { + kite_opts.replication_role = role.into(); + } + if let Some(path) = self.options.replication_sidecar_path.take() { + kite_opts.replication_sidecar_path = Some(path.into()); + } + if let Some(path) = self.options.replication_source_db_path.take() { + kite_opts.replication_source_db_path = Some(path.into()); + } + if let Some(path) = self.options.replication_source_sidecar_path.take() { + kite_opts.replication_source_sidecar_path = Some(path.into()); + } + if let Some(value) = self.options.replication_segment_max_bytes { + if value >= 0 { + kite_opts.replication_segment_max_bytes = Some(value as u64); + } + } + if let Some(value) = self.options.replication_retention_min_entries { + if value >= 0 { + kite_opts.replication_retention_min_entries = Some(value as u64); + } + } + if let Some(value) = self.options.replication_retention_min_ms { + if value >= 0 { + kite_opts.replication_retention_min_ms = Some(value as u64); + } + } for node in &self.options.nodes { let key_spec = Arc::new(parse_key_spec(&node.name, node.key.clone())?); diff --git a/ray-rs/src/napi_bindings/kite/types.rs b/ray-rs/src/napi_bindings/kite/types.rs index 97f00d3..667d4ce 100644 --- a/ray-rs/src/napi_bindings/kite/types.rs +++ b/ray-rs/src/napi_bindings/kite/types.rs @@ -6,7 +6,7 @@ use napi_derive::napi; use std::collections::HashMap; -use super::super::database::{JsPropValue, JsSyncMode}; +use super::super::database::{JsPropValue, JsReplicationRole, JsSyncMode}; // ============================================================================= // Schema Input Types @@ -88,4 +88,20 @@ pub struct JsKiteOptions { pub wal_size_mb: Option, /// WAL usage threshold (0.0-1.0) to trigger auto-checkpoint pub checkpoint_threshold: Option, + /// On close, checkpoint if WAL usage is at or above this threshold (default: 0.2) + pub close_checkpoint_if_wal_usage_at_least: Option, + /// Replication role: "Disabled", "Primary", or "Replica" + pub replication_role: Option, + /// Replication sidecar path override + pub replication_sidecar_path: Option, + /// Source primary db path (replica role only) + pub replication_source_db_path: Option, + /// Source primary sidecar path (replica role only) + pub replication_source_sidecar_path: Option, + /// Segment rotation threshold in bytes (primary role only) + pub replication_segment_max_bytes: Option, + /// Minimum retained entries window (primary role only) + pub replication_retention_min_entries: Option, + /// Minimum retained segment age in milliseconds (primary role only) + pub replication_retention_min_ms: Option, } diff --git a/ray-rs/src/pyo3_bindings/database.rs b/ray-rs/src/pyo3_bindings/database.rs index dbae208..c12c1aa 100644 --- a/ray-rs/src/pyo3_bindings/database.rs +++ b/ray-rs/src/pyo3_bindings/database.rs @@ -5,15 +5,20 @@ use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList}; use std::path::PathBuf; +use std::str::FromStr; use std::sync::RwLock; +use crate::api::kite::KiteRuntimeProfile as RustKiteRuntimeProfile; use crate::backup as core_backup; use crate::core::single_file::{ - close_single_file, is_single_file_path, open_single_file, SingleFileDB as RustSingleFileDB, + close_single_file, close_single_file_with_options, is_single_file_path, open_single_file, + SingleFileCloseOptions as RustSingleFileCloseOptions, SingleFileDB as RustSingleFileDB, VacuumOptions as RustVacuumOptions, }; use crate::metrics as core_metrics; +use crate::replication::types::CommitToken; use crate::types::{ETypeId, EdgeWithProps as CoreEdgeWithProps, NodeId, PropKeyId}; // Import from modular structure @@ -23,8 +28,8 @@ use super::ops::{ }; use super::options::{ BackupOptions, BackupResult, ExportOptions, ExportResult, ImportOptions, ImportResult, - OfflineBackupOptions, OpenOptions, PaginationOptions, RestoreOptions, SingleFileOptimizeOptions, - StreamOptions, + OfflineBackupOptions, OpenOptions, PaginationOptions, RestoreOptions, RuntimeProfile, + SingleFileOptimizeOptions, StreamOptions, }; use super::stats::{CacheStats, CheckResult, DatabaseMetrics, DbStats, HealthCheckResult}; use super::traversal::{PyPathEdge, PyPathResult, PyTraversalResult}; @@ -178,6 +183,12 @@ impl PyDatabase { }) } + #[staticmethod] + #[pyo3(signature = (path, options=None))] + fn open(path: String, options: Option) -> PyResult { + Self::new(path, options) + } + fn close(&self) -> PyResult<()> { let mut guard = self .inner @@ -192,6 +203,24 @@ impl PyDatabase { Ok(()) } + #[pyo3(signature = (threshold))] + fn close_with_checkpoint_if_wal_over(&self, threshold: f64) -> PyResult<()> { + let mut guard = self + .inner + .write() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + if let Some(db) = guard.take() { + match db { + DatabaseInner::SingleFile(db) => close_single_file_with_options( + *db, + RustSingleFileCloseOptions::new().checkpoint_if_wal_usage_at_least(threshold), + ) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to close: {e}")))?, + } + } + Ok(()) + } + fn __enter__(slf: PyRef<'_, Self>) -> PyResult> { Ok(slf) } @@ -268,6 +297,243 @@ impl PyDatabase { dispatch_ok!(self, |db| db.has_transaction(), |_db| false) } + /// Commit and return replication commit token (e.g. "2:41") when available. + fn commit_with_token(&self) -> PyResult> { + dispatch!( + self, + |db| db + .commit_with_token() + .map(|token| token.map(|value| value.to_string())) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to commit: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Wait until this DB has observed at least the provided commit token. + fn wait_for_token(&self, token: String, timeout_ms: i64) -> PyResult { + if timeout_ms < 0 { + return Err(PyRuntimeError::new_err("timeout_ms must be non-negative")); + } + let token = CommitToken::from_str(&token) + .map_err(|e| PyRuntimeError::new_err(format!("Invalid token: {e}")))?; + dispatch!( + self, + |db| db + .wait_for_token(token, timeout_ms as u64) + .map_err(|e| PyRuntimeError::new_err(format!("Failed waiting for token: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Primary replication status dictionary when role=primary, else None. + fn primary_replication_status(&self, py: Python<'_>) -> PyResult> { + let guard = self + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + let Some(status) = db.primary_replication_status() else { + return Ok(None); + }; + + let out = PyDict::new_bound(py); + out.set_item("role", status.role.to_string())?; + out.set_item("epoch", status.epoch)?; + out.set_item("head_log_index", status.head_log_index)?; + out.set_item("retained_floor", status.retained_floor)?; + out.set_item( + "sidecar_path", + status.sidecar_path.to_string_lossy().to_string(), + )?; + out.set_item( + "last_token", + status.last_token.map(|token| token.to_string()), + )?; + out.set_item("append_attempts", status.append_attempts)?; + out.set_item("append_failures", status.append_failures)?; + out.set_item("append_successes", status.append_successes)?; + + let lags = PyList::empty_bound(py); + for lag in status.replica_lags { + let lag_item = PyDict::new_bound(py); + lag_item.set_item("replica_id", lag.replica_id)?; + lag_item.set_item("epoch", lag.epoch)?; + lag_item.set_item("applied_log_index", lag.applied_log_index)?; + lags.append(lag_item)?; + } + out.set_item("replica_lags", lags)?; + + Ok(Some(out.into_py(py))) + } + None => Err(PyRuntimeError::new_err("Database is closed")), + } + } + + /// Replica replication status dictionary when role=replica, else None. + fn replica_replication_status(&self, py: Python<'_>) -> PyResult> { + let guard = self + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(db)) => { + let Some(status) = db.replica_replication_status() else { + return Ok(None); + }; + + let out = PyDict::new_bound(py); + out.set_item("role", status.role.to_string())?; + out.set_item( + "source_db_path", + status + .source_db_path + .map(|path| path.to_string_lossy().to_string()), + )?; + out.set_item( + "source_sidecar_path", + status + .source_sidecar_path + .map(|path| path.to_string_lossy().to_string()), + )?; + out.set_item("applied_epoch", status.applied_epoch)?; + out.set_item("applied_log_index", status.applied_log_index)?; + out.set_item("last_error", status.last_error)?; + out.set_item("needs_reseed", status.needs_reseed)?; + Ok(Some(out.into_py(py))) + } + None => Err(PyRuntimeError::new_err("Database is closed")), + } + } + + /// Promote this primary to the next replication epoch. + fn primary_promote_to_next_epoch(&self) -> PyResult { + dispatch!( + self, + |db| db + .primary_promote_to_next_epoch() + .map(|value| value as i64) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to promote primary: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Report replica progress cursor to primary. + fn primary_report_replica_progress( + &self, + replica_id: String, + epoch: i64, + applied_log_index: i64, + ) -> PyResult<()> { + if epoch < 0 || applied_log_index < 0 { + return Err(PyRuntimeError::new_err( + "epoch and applied_log_index must be non-negative", + )); + } + dispatch!( + self, + |db| db + .primary_report_replica_progress(&replica_id, epoch as u64, applied_log_index as u64) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to report replica progress: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Run primary retention and return (pruned_segments, retained_floor). + fn primary_run_retention(&self) -> PyResult<(i64, i64)> { + dispatch!( + self, + |db| db + .primary_run_retention() + .map(|outcome| ( + outcome.pruned_segments as i64, + outcome.retained_floor as i64 + )) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to run retention: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Export latest primary snapshot metadata and optional bytes as transport JSON. + #[pyo3(signature = (include_data=false))] + fn export_replication_snapshot_transport_json(&self, include_data: bool) -> PyResult { + dispatch!( + self, + |db| db + .primary_export_snapshot_transport_json(include_data) + .map_err(|e| { + PyRuntimeError::new_err(format!("Failed to export replication snapshot: {e}")) + }), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Export primary replication log page (cursor + limits) as transport JSON. + #[pyo3(signature = (cursor=None, max_frames=128, max_bytes=1048576, include_payload=true))] + fn export_replication_log_transport_json( + &self, + cursor: Option, + max_frames: i64, + max_bytes: i64, + include_payload: bool, + ) -> PyResult { + if max_frames <= 0 { + return Err(PyRuntimeError::new_err("max_frames must be positive")); + } + if max_bytes <= 0 { + return Err(PyRuntimeError::new_err("max_bytes must be positive")); + } + dispatch!( + self, + |db| db + .primary_export_log_transport_json( + cursor.as_deref(), + max_frames as usize, + max_bytes as usize, + include_payload, + ) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to export replication log: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Bootstrap replica state from source snapshot. + fn replica_bootstrap_from_snapshot(&self) -> PyResult<()> { + dispatch!( + self, + |db| db + .replica_bootstrap_from_snapshot() + .map_err(|e| PyRuntimeError::new_err(format!("Failed to bootstrap replica: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Pull and apply at most max_frames frames on replica. + fn replica_catch_up_once(&self, max_frames: i64) -> PyResult { + if max_frames < 0 { + return Err(PyRuntimeError::new_err("max_frames must be non-negative")); + } + dispatch!( + self, + |db| db + .replica_catch_up_once(max_frames as usize) + .map(|count| count as i64) + .map_err(|e| PyRuntimeError::new_err(format!("Failed replica catch-up: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + + /// Force a replica reseed from source snapshot. + fn replica_reseed_from_snapshot(&self) -> PyResult<()> { + dispatch!( + self, + |db| db + .replica_reseed_from_snapshot() + .map_err(|e| PyRuntimeError::new_err(format!("Failed to reseed replica: {e}"))), + |_db| { unreachable!("multi-file database support removed") } + ) + } + // ========================================================================== // Node Operations // ========================================================================== @@ -1528,6 +1794,21 @@ pub fn open_database(path: String, options: Option) -> PyResult RuntimeProfile { + RuntimeProfile::from_kite_runtime_profile(RustKiteRuntimeProfile::safe()) +} + +#[pyfunction] +pub fn recommended_balanced_profile() -> RuntimeProfile { + RuntimeProfile::from_kite_runtime_profile(RustKiteRuntimeProfile::balanced()) +} + +#[pyfunction] +pub fn recommended_reopen_heavy_profile() -> RuntimeProfile { + RuntimeProfile::from_kite_runtime_profile(RustKiteRuntimeProfile::reopen_heavy()) +} + #[pyfunction] pub fn collect_metrics(db: &PyDatabase) -> PyResult { let guard = db @@ -1542,6 +1823,660 @@ pub fn collect_metrics(db: &PyDatabase) -> PyResult { } } +#[pyfunction] +pub fn collect_replication_metrics_prometheus(db: &PyDatabase) -> PyResult { + let guard = db + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(d)) => { + Ok(core_metrics::collect_replication_metrics_prometheus_single_file(d)) + } + None => Err(PyRuntimeError::new_err("Database is closed")), + } +} + +#[pyfunction] +pub fn collect_replication_metrics_otel_json(db: &PyDatabase) -> PyResult { + let guard = db + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(d)) => { + Ok(core_metrics::collect_replication_metrics_otel_json_single_file(d)) + } + None => Err(PyRuntimeError::new_err("Database is closed")), + } +} + +#[pyfunction] +pub fn collect_replication_metrics_otel_protobuf(db: &PyDatabase) -> PyResult> { + let guard = db + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(d)) => { + Ok(core_metrics::collect_replication_metrics_otel_protobuf_single_file(d)) + } + None => Err(PyRuntimeError::new_err("Database is closed")), + } +} + +#[pyfunction] +#[pyo3(signature = (db, include_data=false))] +pub fn collect_replication_snapshot_transport_json( + db: &PyDatabase, + include_data: bool, +) -> PyResult { + let guard = db + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(d)) => d + .primary_export_snapshot_transport_json(include_data) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to export replication snapshot: {e}"))), + None => Err(PyRuntimeError::new_err("Database is closed")), + } +} + +#[pyfunction] +#[pyo3(signature = (db, cursor=None, max_frames=128, max_bytes=1048576, include_payload=true))] +pub fn collect_replication_log_transport_json( + db: &PyDatabase, + cursor: Option, + max_frames: i64, + max_bytes: i64, + include_payload: bool, +) -> PyResult { + if max_frames <= 0 { + return Err(PyRuntimeError::new_err("max_frames must be positive")); + } + if max_bytes <= 0 { + return Err(PyRuntimeError::new_err("max_bytes must be positive")); + } + + let guard = db + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(d)) => d + .primary_export_log_transport_json( + cursor.as_deref(), + max_frames as usize, + max_bytes as usize, + include_payload, + ) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to export replication log: {e}"))), + None => Err(PyRuntimeError::new_err("Database is closed")), + } +} + +#[allow(clippy::too_many_arguments)] +fn build_otel_push_options_py( + timeout_ms: i64, + bearer_token: Option, + retry_max_attempts: i64, + retry_backoff_ms: i64, + retry_backoff_max_ms: i64, + retry_jitter_ratio: f64, + adaptive_retry: bool, + adaptive_retry_mode: Option, + adaptive_retry_ewma_alpha: f64, + circuit_breaker_failure_threshold: i64, + circuit_breaker_open_ms: i64, + circuit_breaker_half_open_probes: i64, + circuit_breaker_state_path: Option, + circuit_breaker_state_url: Option, + circuit_breaker_state_patch: bool, + circuit_breaker_state_patch_batch: bool, + circuit_breaker_state_patch_batch_max_keys: i64, + circuit_breaker_state_patch_merge: bool, + circuit_breaker_state_patch_merge_max_keys: i64, + circuit_breaker_state_patch_retry_max_attempts: i64, + circuit_breaker_state_cas: bool, + circuit_breaker_state_lease_id: Option, + circuit_breaker_scope_key: Option, + compression_gzip: bool, + https_only: bool, + ca_cert_pem_path: Option, + client_cert_pem_path: Option, + client_key_pem_path: Option, +) -> PyResult { + if timeout_ms <= 0 { + return Err(PyRuntimeError::new_err("timeout_ms must be positive")); + } + if retry_max_attempts <= 0 { + return Err(PyRuntimeError::new_err( + "retry_max_attempts must be positive", + )); + } + if retry_backoff_ms < 0 { + return Err(PyRuntimeError::new_err( + "retry_backoff_ms must be non-negative", + )); + } + if retry_backoff_max_ms < 0 { + return Err(PyRuntimeError::new_err( + "retry_backoff_max_ms must be non-negative", + )); + } + if retry_backoff_max_ms > 0 && retry_backoff_max_ms < retry_backoff_ms { + return Err(PyRuntimeError::new_err( + "retry_backoff_max_ms must be >= retry_backoff_ms when non-zero", + )); + } + if !(0.0..=1.0).contains(&retry_jitter_ratio) { + return Err(PyRuntimeError::new_err( + "retry_jitter_ratio must be within [0.0, 1.0]", + )); + } + let adaptive_retry_mode = match adaptive_retry_mode + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + { + None => core_metrics::OtlpAdaptiveRetryMode::Linear, + Some(value) if value.eq_ignore_ascii_case("linear") => { + core_metrics::OtlpAdaptiveRetryMode::Linear + } + Some(value) if value.eq_ignore_ascii_case("ewma") => core_metrics::OtlpAdaptiveRetryMode::Ewma, + Some(_) => { + return Err(PyRuntimeError::new_err( + "adaptive_retry_mode must be one of: linear, ewma", + )); + } + }; + if !(0.0..=1.0).contains(&adaptive_retry_ewma_alpha) { + return Err(PyRuntimeError::new_err( + "adaptive_retry_ewma_alpha must be within [0.0, 1.0]", + )); + } + if circuit_breaker_failure_threshold < 0 { + return Err(PyRuntimeError::new_err( + "circuit_breaker_failure_threshold must be non-negative", + )); + } + if circuit_breaker_open_ms < 0 { + return Err(PyRuntimeError::new_err( + "circuit_breaker_open_ms must be non-negative", + )); + } + if circuit_breaker_failure_threshold > 0 && circuit_breaker_open_ms == 0 { + return Err(PyRuntimeError::new_err( + "circuit_breaker_open_ms must be > 0 when circuit_breaker_failure_threshold is enabled", + )); + } + if circuit_breaker_half_open_probes < 0 { + return Err(PyRuntimeError::new_err( + "circuit_breaker_half_open_probes must be non-negative", + )); + } + if circuit_breaker_failure_threshold > 0 && circuit_breaker_half_open_probes == 0 { + return Err(PyRuntimeError::new_err( + "circuit_breaker_half_open_probes must be > 0 when circuit_breaker_failure_threshold is enabled", + )); + } + if let Some(path) = circuit_breaker_state_path.as_deref() { + if path.trim().is_empty() { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_path must not be empty when provided", + )); + } + } + if let Some(url) = circuit_breaker_state_url.as_deref() { + let trimmed = url.trim(); + if trimmed.is_empty() { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_url must not be empty when provided", + )); + } + if !(trimmed.starts_with("http://") || trimmed.starts_with("https://")) { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_url must use http:// or https://", + )); + } + if https_only && trimmed.starts_with("http://") { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_url must use https when https_only is enabled", + )); + } + } + if circuit_breaker_state_path.is_some() && circuit_breaker_state_url.is_some() { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_path and circuit_breaker_state_url are mutually exclusive", + )); + } + if circuit_breaker_state_patch && circuit_breaker_state_url.is_none() { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_patch requires circuit_breaker_state_url", + )); + } + if circuit_breaker_state_patch_batch && !circuit_breaker_state_patch { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_patch_batch requires circuit_breaker_state_patch", + )); + } + if circuit_breaker_state_patch_merge && !circuit_breaker_state_patch { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_patch_merge requires circuit_breaker_state_patch", + )); + } + if circuit_breaker_state_patch_batch_max_keys <= 0 { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_patch_batch_max_keys must be > 0", + )); + } + if circuit_breaker_state_patch_merge_max_keys <= 0 { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_patch_merge_max_keys must be > 0", + )); + } + if circuit_breaker_state_patch_retry_max_attempts <= 0 { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_patch_retry_max_attempts must be > 0", + )); + } + if circuit_breaker_state_cas && circuit_breaker_state_url.is_none() { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_cas requires circuit_breaker_state_url", + )); + } + if let Some(lease_id) = circuit_breaker_state_lease_id.as_deref() { + if lease_id.trim().is_empty() { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_lease_id must not be empty when provided", + )); + } + if circuit_breaker_state_url.is_none() { + return Err(PyRuntimeError::new_err( + "circuit_breaker_state_lease_id requires circuit_breaker_state_url", + )); + } + } + if let Some(scope_key) = circuit_breaker_scope_key.as_deref() { + if scope_key.trim().is_empty() { + return Err(PyRuntimeError::new_err( + "circuit_breaker_scope_key must not be empty when provided", + )); + } + } + + Ok(core_metrics::OtlpHttpPushOptions { + timeout_ms: timeout_ms as u64, + bearer_token, + retry_max_attempts: retry_max_attempts as u32, + retry_backoff_ms: retry_backoff_ms as u64, + retry_backoff_max_ms: retry_backoff_max_ms as u64, + retry_jitter_ratio, + adaptive_retry_mode, + adaptive_retry_ewma_alpha, + adaptive_retry, + circuit_breaker_failure_threshold: circuit_breaker_failure_threshold as u32, + circuit_breaker_open_ms: circuit_breaker_open_ms as u64, + circuit_breaker_half_open_probes: circuit_breaker_half_open_probes as u32, + circuit_breaker_state_path, + circuit_breaker_state_url, + circuit_breaker_state_patch, + circuit_breaker_state_patch_batch, + circuit_breaker_state_patch_batch_max_keys: circuit_breaker_state_patch_batch_max_keys as u32, + circuit_breaker_state_patch_merge, + circuit_breaker_state_patch_merge_max_keys: circuit_breaker_state_patch_merge_max_keys as u32, + circuit_breaker_state_patch_retry_max_attempts: circuit_breaker_state_patch_retry_max_attempts + as u32, + circuit_breaker_state_cas, + circuit_breaker_state_lease_id, + circuit_breaker_scope_key, + compression_gzip, + tls: core_metrics::OtlpHttpTlsOptions { + https_only, + ca_cert_pem_path, + client_cert_pem_path, + client_key_pem_path, + }, + }) +} + +#[pyfunction] +#[pyo3(signature = ( + db, + endpoint, + timeout_ms=5000, + bearer_token=None, + retry_max_attempts=1, + retry_backoff_ms=100, + retry_backoff_max_ms=2000, + retry_jitter_ratio=0.0, + adaptive_retry=false, + adaptive_retry_mode=None, + adaptive_retry_ewma_alpha=0.3, + circuit_breaker_failure_threshold=0, + circuit_breaker_open_ms=0, + circuit_breaker_half_open_probes=1, + circuit_breaker_state_path=None, + circuit_breaker_state_url=None, + circuit_breaker_state_patch=false, + circuit_breaker_state_patch_batch=false, + circuit_breaker_state_patch_batch_max_keys=8, + circuit_breaker_state_patch_merge=false, + circuit_breaker_state_patch_merge_max_keys=32, + circuit_breaker_state_patch_retry_max_attempts=1, + circuit_breaker_state_cas=false, + circuit_breaker_state_lease_id=None, + circuit_breaker_scope_key=None, + compression_gzip=false, + https_only=false, + ca_cert_pem_path=None, + client_cert_pem_path=None, + client_key_pem_path=None +))] +pub fn push_replication_metrics_otel_json( + db: &PyDatabase, + endpoint: String, + timeout_ms: i64, + bearer_token: Option, + retry_max_attempts: i64, + retry_backoff_ms: i64, + retry_backoff_max_ms: i64, + retry_jitter_ratio: f64, + adaptive_retry: bool, + adaptive_retry_mode: Option, + adaptive_retry_ewma_alpha: f64, + circuit_breaker_failure_threshold: i64, + circuit_breaker_open_ms: i64, + circuit_breaker_half_open_probes: i64, + circuit_breaker_state_path: Option, + circuit_breaker_state_url: Option, + circuit_breaker_state_patch: bool, + circuit_breaker_state_patch_batch: bool, + circuit_breaker_state_patch_batch_max_keys: i64, + circuit_breaker_state_patch_merge: bool, + circuit_breaker_state_patch_merge_max_keys: i64, + circuit_breaker_state_patch_retry_max_attempts: i64, + circuit_breaker_state_cas: bool, + circuit_breaker_state_lease_id: Option, + circuit_breaker_scope_key: Option, + compression_gzip: bool, + https_only: bool, + ca_cert_pem_path: Option, + client_cert_pem_path: Option, + client_key_pem_path: Option, +) -> PyResult<(i64, String)> { + let options = build_otel_push_options_py( + timeout_ms, + bearer_token, + retry_max_attempts, + retry_backoff_ms, + retry_backoff_max_ms, + retry_jitter_ratio, + adaptive_retry, + adaptive_retry_mode, + adaptive_retry_ewma_alpha, + circuit_breaker_failure_threshold, + circuit_breaker_open_ms, + circuit_breaker_half_open_probes, + circuit_breaker_state_path, + circuit_breaker_state_url, + circuit_breaker_state_patch, + circuit_breaker_state_patch_batch, + circuit_breaker_state_patch_batch_max_keys, + circuit_breaker_state_patch_merge, + circuit_breaker_state_patch_merge_max_keys, + circuit_breaker_state_patch_retry_max_attempts, + circuit_breaker_state_cas, + circuit_breaker_state_lease_id, + circuit_breaker_scope_key, + compression_gzip, + https_only, + ca_cert_pem_path, + client_cert_pem_path, + client_key_pem_path, + )?; + + let guard = db + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(d)) => { + let result = core_metrics::push_replication_metrics_otel_json_single_file_with_options( + d, &endpoint, &options, + ) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to push replication metrics: {e}")))?; + Ok((result.status_code, result.response_body)) + } + None => Err(PyRuntimeError::new_err("Database is closed")), + } +} + +#[pyfunction] +#[pyo3(signature = ( + db, + endpoint, + timeout_ms=5000, + bearer_token=None, + retry_max_attempts=1, + retry_backoff_ms=100, + retry_backoff_max_ms=2000, + retry_jitter_ratio=0.0, + adaptive_retry=false, + adaptive_retry_mode=None, + adaptive_retry_ewma_alpha=0.3, + circuit_breaker_failure_threshold=0, + circuit_breaker_open_ms=0, + circuit_breaker_half_open_probes=1, + circuit_breaker_state_path=None, + circuit_breaker_state_url=None, + circuit_breaker_state_patch=false, + circuit_breaker_state_patch_batch=false, + circuit_breaker_state_patch_batch_max_keys=8, + circuit_breaker_state_patch_merge=false, + circuit_breaker_state_patch_merge_max_keys=32, + circuit_breaker_state_patch_retry_max_attempts=1, + circuit_breaker_state_cas=false, + circuit_breaker_state_lease_id=None, + circuit_breaker_scope_key=None, + compression_gzip=false, + https_only=false, + ca_cert_pem_path=None, + client_cert_pem_path=None, + client_key_pem_path=None +))] +pub fn push_replication_metrics_otel_protobuf( + db: &PyDatabase, + endpoint: String, + timeout_ms: i64, + bearer_token: Option, + retry_max_attempts: i64, + retry_backoff_ms: i64, + retry_backoff_max_ms: i64, + retry_jitter_ratio: f64, + adaptive_retry: bool, + adaptive_retry_mode: Option, + adaptive_retry_ewma_alpha: f64, + circuit_breaker_failure_threshold: i64, + circuit_breaker_open_ms: i64, + circuit_breaker_half_open_probes: i64, + circuit_breaker_state_path: Option, + circuit_breaker_state_url: Option, + circuit_breaker_state_patch: bool, + circuit_breaker_state_patch_batch: bool, + circuit_breaker_state_patch_batch_max_keys: i64, + circuit_breaker_state_patch_merge: bool, + circuit_breaker_state_patch_merge_max_keys: i64, + circuit_breaker_state_patch_retry_max_attempts: i64, + circuit_breaker_state_cas: bool, + circuit_breaker_state_lease_id: Option, + circuit_breaker_scope_key: Option, + compression_gzip: bool, + https_only: bool, + ca_cert_pem_path: Option, + client_cert_pem_path: Option, + client_key_pem_path: Option, +) -> PyResult<(i64, String)> { + let options = build_otel_push_options_py( + timeout_ms, + bearer_token, + retry_max_attempts, + retry_backoff_ms, + retry_backoff_max_ms, + retry_jitter_ratio, + adaptive_retry, + adaptive_retry_mode, + adaptive_retry_ewma_alpha, + circuit_breaker_failure_threshold, + circuit_breaker_open_ms, + circuit_breaker_half_open_probes, + circuit_breaker_state_path, + circuit_breaker_state_url, + circuit_breaker_state_patch, + circuit_breaker_state_patch_batch, + circuit_breaker_state_patch_batch_max_keys, + circuit_breaker_state_patch_merge, + circuit_breaker_state_patch_merge_max_keys, + circuit_breaker_state_patch_retry_max_attempts, + circuit_breaker_state_cas, + circuit_breaker_state_lease_id, + circuit_breaker_scope_key, + compression_gzip, + https_only, + ca_cert_pem_path, + client_cert_pem_path, + client_key_pem_path, + )?; + + let guard = db + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(d)) => { + let result = core_metrics::push_replication_metrics_otel_protobuf_single_file_with_options( + d, &endpoint, &options, + ) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to push replication metrics: {e}")))?; + Ok((result.status_code, result.response_body)) + } + None => Err(PyRuntimeError::new_err("Database is closed")), + } +} + +#[pyfunction] +#[pyo3(signature = ( + db, + endpoint, + timeout_ms=5000, + bearer_token=None, + retry_max_attempts=1, + retry_backoff_ms=100, + retry_backoff_max_ms=2000, + retry_jitter_ratio=0.0, + adaptive_retry=false, + adaptive_retry_mode=None, + adaptive_retry_ewma_alpha=0.3, + circuit_breaker_failure_threshold=0, + circuit_breaker_open_ms=0, + circuit_breaker_half_open_probes=1, + circuit_breaker_state_path=None, + circuit_breaker_state_url=None, + circuit_breaker_state_patch=false, + circuit_breaker_state_patch_batch=false, + circuit_breaker_state_patch_batch_max_keys=8, + circuit_breaker_state_patch_merge=false, + circuit_breaker_state_patch_merge_max_keys=32, + circuit_breaker_state_patch_retry_max_attempts=1, + circuit_breaker_state_cas=false, + circuit_breaker_state_lease_id=None, + circuit_breaker_scope_key=None, + compression_gzip=false, + https_only=false, + ca_cert_pem_path=None, + client_cert_pem_path=None, + client_key_pem_path=None +))] +pub fn push_replication_metrics_otel_grpc( + db: &PyDatabase, + endpoint: String, + timeout_ms: i64, + bearer_token: Option, + retry_max_attempts: i64, + retry_backoff_ms: i64, + retry_backoff_max_ms: i64, + retry_jitter_ratio: f64, + adaptive_retry: bool, + adaptive_retry_mode: Option, + adaptive_retry_ewma_alpha: f64, + circuit_breaker_failure_threshold: i64, + circuit_breaker_open_ms: i64, + circuit_breaker_half_open_probes: i64, + circuit_breaker_state_path: Option, + circuit_breaker_state_url: Option, + circuit_breaker_state_patch: bool, + circuit_breaker_state_patch_batch: bool, + circuit_breaker_state_patch_batch_max_keys: i64, + circuit_breaker_state_patch_merge: bool, + circuit_breaker_state_patch_merge_max_keys: i64, + circuit_breaker_state_patch_retry_max_attempts: i64, + circuit_breaker_state_cas: bool, + circuit_breaker_state_lease_id: Option, + circuit_breaker_scope_key: Option, + compression_gzip: bool, + https_only: bool, + ca_cert_pem_path: Option, + client_cert_pem_path: Option, + client_key_pem_path: Option, +) -> PyResult<(i64, String)> { + let options = build_otel_push_options_py( + timeout_ms, + bearer_token, + retry_max_attempts, + retry_backoff_ms, + retry_backoff_max_ms, + retry_jitter_ratio, + adaptive_retry, + adaptive_retry_mode, + adaptive_retry_ewma_alpha, + circuit_breaker_failure_threshold, + circuit_breaker_open_ms, + circuit_breaker_half_open_probes, + circuit_breaker_state_path, + circuit_breaker_state_url, + circuit_breaker_state_patch, + circuit_breaker_state_patch_batch, + circuit_breaker_state_patch_batch_max_keys, + circuit_breaker_state_patch_merge, + circuit_breaker_state_patch_merge_max_keys, + circuit_breaker_state_patch_retry_max_attempts, + circuit_breaker_state_cas, + circuit_breaker_state_lease_id, + circuit_breaker_scope_key, + compression_gzip, + https_only, + ca_cert_pem_path, + client_cert_pem_path, + client_key_pem_path, + )?; + + let guard = db + .inner + .read() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + match guard.as_ref() { + Some(DatabaseInner::SingleFile(d)) => { + let result = core_metrics::push_replication_metrics_otel_grpc_single_file_with_options( + d, &endpoint, &options, + ) + .map_err(|e| PyRuntimeError::new_err(format!("Failed to push replication metrics: {e}")))?; + Ok((result.status_code, result.response_body)) + } + None => Err(PyRuntimeError::new_err("Database is closed")), + } +} + #[pyfunction] pub fn health_check(db: &PyDatabase) -> PyResult { let guard = db diff --git a/ray-rs/src/pyo3_bindings/mod.rs b/ray-rs/src/pyo3_bindings/mod.rs index fe385cb..09f2ad5 100644 --- a/ray-rs/src/pyo3_bindings/mod.rs +++ b/ray-rs/src/pyo3_bindings/mod.rs @@ -55,6 +55,7 @@ pub fn kitedb(m: &Bound<'_, PyModule>) -> PyResult<()> { // Options classes m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; @@ -76,6 +77,9 @@ pub fn kitedb(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; @@ -113,7 +117,45 @@ pub fn kitedb(m: &Bound<'_, PyModule>) -> PyResult<()> { // Standalone functions m.add_function(wrap_pyfunction!(database::open_database, m)?)?; + m.add_function(wrap_pyfunction!(database::recommended_safe_profile, m)?)?; + m.add_function(wrap_pyfunction!(database::recommended_balanced_profile, m)?)?; + m.add_function(wrap_pyfunction!( + database::recommended_reopen_heavy_profile, + m + )?)?; m.add_function(wrap_pyfunction!(database::collect_metrics, m)?)?; + m.add_function(wrap_pyfunction!( + database::collect_replication_metrics_prometheus, + m + )?)?; + m.add_function(wrap_pyfunction!( + database::collect_replication_metrics_otel_json, + m + )?)?; + m.add_function(wrap_pyfunction!( + database::collect_replication_metrics_otel_protobuf, + m + )?)?; + m.add_function(wrap_pyfunction!( + database::collect_replication_snapshot_transport_json, + m + )?)?; + m.add_function(wrap_pyfunction!( + database::collect_replication_log_transport_json, + m + )?)?; + m.add_function(wrap_pyfunction!( + database::push_replication_metrics_otel_json, + m + )?)?; + m.add_function(wrap_pyfunction!( + database::push_replication_metrics_otel_protobuf, + m + )?)?; + m.add_function(wrap_pyfunction!( + database::push_replication_metrics_otel_grpc, + m + )?)?; m.add_function(wrap_pyfunction!(database::health_check, m)?)?; m.add_function(wrap_pyfunction!(database::create_backup, m)?)?; m.add_function(wrap_pyfunction!(database::restore_backup, m)?)?; diff --git a/ray-rs/src/pyo3_bindings/options/mod.rs b/ray-rs/src/pyo3_bindings/options/mod.rs index 88e8259..e1c07b8 100644 --- a/ray-rs/src/pyo3_bindings/options/mod.rs +++ b/ray-rs/src/pyo3_bindings/options/mod.rs @@ -17,5 +17,5 @@ pub mod streaming; pub use backup::{BackupOptions, BackupResult, OfflineBackupOptions, RestoreOptions}; pub use export::{ExportOptions, ExportResult, ImportOptions, ImportResult}; pub use maintenance::{CompressionOptions, SingleFileOptimizeOptions, VacuumOptions}; -pub use open::{OpenOptions, SnapshotParseMode, SyncMode}; +pub use open::{OpenOptions, RuntimeProfile, SnapshotParseMode, SyncMode}; pub use streaming::{PaginationOptions, StreamOptions}; diff --git a/ray-rs/src/pyo3_bindings/options/open.rs b/ray-rs/src/pyo3_bindings/options/open.rs index 3082aeb..35c7e33 100644 --- a/ray-rs/src/pyo3_bindings/options/open.rs +++ b/ray-rs/src/pyo3_bindings/options/open.rs @@ -1,12 +1,18 @@ //! Database open options for Python bindings use super::maintenance::CompressionOptions; +use crate::api::kite::{ + KiteOptions as RustKiteOptions, KiteRuntimeProfile as RustKiteRuntimeProfile, +}; use crate::core::single_file::{ SingleFileOpenOptions as RustOpenOptions, SnapshotParseMode as RustSnapshotParseMode, SyncMode as RustSyncMode, }; +use crate::replication::types::ReplicationRole; use crate::types::{CacheOptions, PropertyCacheConfig, QueryCacheConfig, TraversalCacheConfig}; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use std::str::FromStr; /// Synchronization mode for WAL writes /// @@ -164,6 +170,27 @@ pub struct OpenOptions { /// Snapshot parse mode: "strict" or "salvage" (single-file only) #[pyo3(get, set)] pub snapshot_parse_mode: Option, + /// Replication role: "disabled", "primary", or "replica" + #[pyo3(get, set)] + pub replication_role: Option, + /// Replication sidecar path override + #[pyo3(get, set)] + pub replication_sidecar_path: Option, + /// Source primary db path (replica role only) + #[pyo3(get, set)] + pub replication_source_db_path: Option, + /// Source primary sidecar path (replica role only) + #[pyo3(get, set)] + pub replication_source_sidecar_path: Option, + /// Segment rotation threshold in bytes (primary role only) + #[pyo3(get, set)] + pub replication_segment_max_bytes: Option, + /// Minimum retained entries window (primary role only) + #[pyo3(get, set)] + pub replication_retention_min_entries: Option, + /// Minimum retained segment age in milliseconds (primary role only) + #[pyo3(get, set)] + pub replication_retention_min_ms: Option, } #[pymethods] @@ -192,7 +219,14 @@ impl OpenOptions { sync_mode=None, group_commit_enabled=None, group_commit_window_ms=None, - snapshot_parse_mode=None + snapshot_parse_mode=None, + replication_role=None, + replication_sidecar_path=None, + replication_source_db_path=None, + replication_source_sidecar_path=None, + replication_segment_max_bytes=None, + replication_retention_min_entries=None, + replication_retention_min_ms=None ))] #[allow(clippy::too_many_arguments)] fn new( @@ -219,6 +253,13 @@ impl OpenOptions { group_commit_enabled: Option, group_commit_window_ms: Option, snapshot_parse_mode: Option, + replication_role: Option, + replication_sidecar_path: Option, + replication_source_db_path: Option, + replication_source_sidecar_path: Option, + replication_segment_max_bytes: Option, + replication_retention_min_entries: Option, + replication_retention_min_ms: Option, ) -> Self { Self { read_only, @@ -244,6 +285,13 @@ impl OpenOptions { group_commit_enabled, group_commit_window_ms, snapshot_parse_mode, + replication_role, + replication_sidecar_path, + replication_source_db_path, + replication_source_sidecar_path, + replication_segment_max_bytes, + replication_retention_min_entries, + replication_retention_min_ms, } } @@ -344,9 +392,138 @@ impl OpenOptions { if let Some(mode) = self.snapshot_parse_mode { rust_opts = rust_opts.snapshot_parse_mode(mode.mode); } + if let Some(ref role) = self.replication_role { + let role = ReplicationRole::from_str(role).map_err(|error| { + PyValueError::new_err(format!("Invalid replication_role '{role}': {error}")) + })?; + rust_opts = rust_opts.replication_role(role); + } + if let Some(ref path) = self.replication_sidecar_path { + rust_opts = rust_opts.replication_sidecar_path(path); + } + if let Some(ref path) = self.replication_source_db_path { + rust_opts = rust_opts.replication_source_db_path(path); + } + if let Some(ref path) = self.replication_source_sidecar_path { + rust_opts = rust_opts.replication_source_sidecar_path(path); + } + if let Some(value) = self.replication_segment_max_bytes { + if value < 0 { + return Err(PyValueError::new_err( + "replication_segment_max_bytes must be non-negative", + )); + } + rust_opts = rust_opts.replication_segment_max_bytes(value as u64); + } + if let Some(value) = self.replication_retention_min_entries { + if value < 0 { + return Err(PyValueError::new_err( + "replication_retention_min_entries must be non-negative", + )); + } + rust_opts = rust_opts.replication_retention_min_entries(value as u64); + } + if let Some(value) = self.replication_retention_min_ms { + if value < 0 { + return Err(PyValueError::new_err( + "replication_retention_min_ms must be non-negative", + )); + } + rust_opts = rust_opts.replication_retention_min_ms(value as u64); + } Ok(rust_opts) } + + /// Build binding open options from high-level Kite profile options. + pub fn from_kite_options(opts: RustKiteOptions) -> Self { + let replication_role = match opts.replication_role { + ReplicationRole::Disabled => "disabled", + ReplicationRole::Primary => "primary", + ReplicationRole::Replica => "replica", + } + .to_string(); + + Self { + read_only: Some(opts.read_only), + create_if_missing: Some(opts.create_if_missing), + mvcc: Some(opts.mvcc), + mvcc_gc_interval_ms: opts.mvcc_gc_interval_ms.and_then(|v| i64::try_from(v).ok()), + mvcc_retention_ms: opts.mvcc_retention_ms.and_then(|v| i64::try_from(v).ok()), + mvcc_max_chain_depth: opts + .mvcc_max_chain_depth + .and_then(|v| i64::try_from(v).ok()), + page_size: None, + wal_size: opts.wal_size.and_then(|v| u32::try_from(v).ok()), + auto_checkpoint: None, + checkpoint_threshold: opts.checkpoint_threshold, + background_checkpoint: None, + checkpoint_compression: None, + cache_snapshot: None, + cache_enabled: None, + cache_max_node_props: None, + cache_max_edge_props: None, + cache_max_traversal_entries: None, + cache_max_query_entries: None, + cache_query_ttl_ms: None, + sync_mode: Some(SyncMode { + mode: opts.sync_mode, + }), + group_commit_enabled: Some(opts.group_commit_enabled), + group_commit_window_ms: i64::try_from(opts.group_commit_window_ms).ok(), + snapshot_parse_mode: None, + replication_role: Some(replication_role), + replication_sidecar_path: opts + .replication_sidecar_path + .map(|p| p.to_string_lossy().to_string()), + replication_source_db_path: opts + .replication_source_db_path + .map(|p| p.to_string_lossy().to_string()), + replication_source_sidecar_path: opts + .replication_source_sidecar_path + .map(|p| p.to_string_lossy().to_string()), + replication_segment_max_bytes: opts + .replication_segment_max_bytes + .and_then(|v| i64::try_from(v).ok()), + replication_retention_min_entries: opts + .replication_retention_min_entries + .and_then(|v| i64::try_from(v).ok()), + replication_retention_min_ms: opts + .replication_retention_min_ms + .and_then(|v| i64::try_from(v).ok()), + } + } +} + +/// Runtime profile preset for open/close behavior. +#[pyclass(name = "RuntimeProfile")] +#[derive(Debug, Clone)] +pub struct RuntimeProfile { + /// Open-time options for Database(path, options) + #[pyo3(get, set)] + pub open_options: OpenOptions, + /// Optional close-time checkpoint threshold + #[pyo3(get, set)] + pub close_checkpoint_if_wal_usage_at_least: Option, +} + +#[pymethods] +impl RuntimeProfile { + fn __repr__(&self) -> String { + format!( + "RuntimeProfile(close_checkpoint_if_wal_usage_at_least={:?})", + self.close_checkpoint_if_wal_usage_at_least + ) + } +} + +impl RuntimeProfile { + pub fn from_kite_runtime_profile(profile: RustKiteRuntimeProfile) -> Self { + Self { + open_options: OpenOptions::from_kite_options(profile.options), + close_checkpoint_if_wal_usage_at_least: profile.close_checkpoint_if_wal_usage_at_least, + } + } } #[cfg(test)] diff --git a/ray-rs/src/pyo3_bindings/stats/metrics.rs b/ray-rs/src/pyo3_bindings/stats/metrics.rs index 616e8e0..1108c25 100644 --- a/ray-rs/src/pyo3_bindings/stats/metrics.rs +++ b/ray-rs/src/pyo3_bindings/stats/metrics.rs @@ -184,6 +184,135 @@ impl From for MvccMetrics { } } +/// Primary replication metrics +#[pyclass(name = "PrimaryReplicationMetrics")] +#[derive(Debug, Clone)] +pub struct PrimaryReplicationMetrics { + #[pyo3(get)] + pub epoch: i64, + #[pyo3(get)] + pub head_log_index: i64, + #[pyo3(get)] + pub retained_floor: i64, + #[pyo3(get)] + pub replica_count: i64, + #[pyo3(get)] + pub stale_epoch_replica_count: i64, + #[pyo3(get)] + pub max_replica_lag: i64, + #[pyo3(get)] + pub min_replica_applied_log_index: Option, + #[pyo3(get)] + pub sidecar_path: String, + #[pyo3(get)] + pub last_token: Option, + #[pyo3(get)] + pub append_attempts: i64, + #[pyo3(get)] + pub append_failures: i64, + #[pyo3(get)] + pub append_successes: i64, +} + +#[pymethods] +impl PrimaryReplicationMetrics { + fn __repr__(&self) -> String { + format!( + "PrimaryReplicationMetrics(epoch={}, head={}, retained_floor={}, replicas={})", + self.epoch, self.head_log_index, self.retained_floor, self.replica_count + ) + } +} + +impl From for PrimaryReplicationMetrics { + fn from(metrics: core_metrics::PrimaryReplicationMetrics) -> Self { + PrimaryReplicationMetrics { + epoch: metrics.epoch, + head_log_index: metrics.head_log_index, + retained_floor: metrics.retained_floor, + replica_count: metrics.replica_count, + stale_epoch_replica_count: metrics.stale_epoch_replica_count, + max_replica_lag: metrics.max_replica_lag, + min_replica_applied_log_index: metrics.min_replica_applied_log_index, + sidecar_path: metrics.sidecar_path, + last_token: metrics.last_token, + append_attempts: metrics.append_attempts, + append_failures: metrics.append_failures, + append_successes: metrics.append_successes, + } + } +} + +/// Replica replication metrics +#[pyclass(name = "ReplicaReplicationMetrics")] +#[derive(Debug, Clone)] +pub struct ReplicaReplicationMetrics { + #[pyo3(get)] + pub applied_epoch: i64, + #[pyo3(get)] + pub applied_log_index: i64, + #[pyo3(get)] + pub needs_reseed: bool, + #[pyo3(get)] + pub last_error: Option, +} + +#[pymethods] +impl ReplicaReplicationMetrics { + fn __repr__(&self) -> String { + format!( + "ReplicaReplicationMetrics(epoch={}, applied_log_index={}, needs_reseed={})", + self.applied_epoch, self.applied_log_index, self.needs_reseed + ) + } +} + +impl From for ReplicaReplicationMetrics { + fn from(metrics: core_metrics::ReplicaReplicationMetrics) -> Self { + ReplicaReplicationMetrics { + applied_epoch: metrics.applied_epoch, + applied_log_index: metrics.applied_log_index, + needs_reseed: metrics.needs_reseed, + last_error: metrics.last_error, + } + } +} + +/// Replication metrics +#[pyclass(name = "ReplicationMetrics")] +#[derive(Debug, Clone)] +pub struct ReplicationMetrics { + #[pyo3(get)] + pub enabled: bool, + #[pyo3(get)] + pub role: String, + #[pyo3(get)] + pub primary: Option, + #[pyo3(get)] + pub replica: Option, +} + +#[pymethods] +impl ReplicationMetrics { + fn __repr__(&self) -> String { + format!( + "ReplicationMetrics(enabled={}, role='{}')", + self.enabled, self.role + ) + } +} + +impl From for ReplicationMetrics { + fn from(metrics: core_metrics::ReplicationMetrics) -> Self { + ReplicationMetrics { + enabled: metrics.enabled, + role: metrics.role, + primary: metrics.primary.map(Into::into), + replica: metrics.replica.map(Into::into), + } + } +} + /// MVCC stats (from stats()) #[pyclass(name = "MvccStats")] #[derive(Debug, Clone)] @@ -286,6 +415,8 @@ pub struct DatabaseMetrics { #[pyo3(get)] pub mvcc: Option, #[pyo3(get)] + pub replication: ReplicationMetrics, + #[pyo3(get)] pub memory: MemoryMetrics, #[pyo3(get)] pub collected_at: i64, @@ -313,6 +444,7 @@ impl From for DatabaseMetrics { data: metrics.data.into(), cache: metrics.cache.into(), mvcc: metrics.mvcc.map(Into::into), + replication: metrics.replication.into(), memory: metrics.memory.into(), collected_at: metrics.collected_at_ms, } diff --git a/ray-rs/src/pyo3_bindings/stats/mod.rs b/ray-rs/src/pyo3_bindings/stats/mod.rs index 6038bbf..d2025c6 100644 --- a/ray-rs/src/pyo3_bindings/stats/mod.rs +++ b/ray-rs/src/pyo3_bindings/stats/mod.rs @@ -14,5 +14,6 @@ pub mod metrics; pub use database::{CacheStats, CheckResult, DbStats}; pub use metrics::{ CacheLayerMetrics, CacheMetrics, DataMetrics, DatabaseMetrics, HealthCheckEntry, - HealthCheckResult, MemoryMetrics, MvccMetrics, MvccStats, + HealthCheckResult, MemoryMetrics, MvccMetrics, MvccStats, PrimaryReplicationMetrics, + ReplicaReplicationMetrics, ReplicationMetrics, }; diff --git a/ray-rs/src/replication/log_store.rs b/ray-rs/src/replication/log_store.rs new file mode 100644 index 0000000..48b4962 --- /dev/null +++ b/ray-rs/src/replication/log_store.rs @@ -0,0 +1,540 @@ +//! Replication segment log storage. + +use crate::error::{KiteError, Result}; +use crate::util::crc::{crc32c, crc32c_multi}; +use byteorder::{LittleEndian, ReadBytesExt}; +use std::fs::{self, File, OpenOptions}; +use std::io::{self, BufReader, Read, Seek, SeekFrom, Write}; +use std::path::{Path, PathBuf}; + +const FRAME_MAGIC: u32 = 0x474F_4C52; // "RLOG" in little-endian u32 +const FRAME_VERSION: u16 = 1; +const FRAME_FLAG_CRC32_DISABLED: u16 = 0x0001; +const FRAME_HEADER_SIZE: usize = std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::(); +const MAX_FRAME_PAYLOAD_BYTES: usize = 64 * 1024 * 1024; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ReplicationFrame { + pub epoch: u64, + pub log_index: u64, + pub payload: Vec, +} + +impl ReplicationFrame { + pub fn new(epoch: u64, log_index: u64, payload: Vec) -> Self { + Self { + epoch, + log_index, + payload, + } + } +} + +#[derive(Debug)] +pub struct SegmentLogStore { + path: PathBuf, + file: File, + write_buffer: Vec, + write_chunks: Vec>, + queued_bytes: usize, + write_buffer_limit: usize, + writable: bool, +} + +impl SegmentLogStore { + pub fn create(path: impl AsRef) -> Result { + let path = path.as_ref().to_path_buf(); + + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + + let file = OpenOptions::new() + .create(true) + .truncate(true) + .read(true) + .write(true) + .open(&path)?; + + Ok(Self { + path, + file, + write_buffer: Vec::new(), + write_chunks: Vec::new(), + queued_bytes: 0, + write_buffer_limit: 0, + writable: true, + }) + } + + pub fn open(path: impl AsRef) -> Result { + let path = path.as_ref().to_path_buf(); + let file = OpenOptions::new().read(true).open(&path)?; + + Ok(Self { + path, + file, + write_buffer: Vec::new(), + write_chunks: Vec::new(), + queued_bytes: 0, + write_buffer_limit: 0, + writable: false, + }) + } + + pub fn open_or_create_append(path: impl AsRef) -> Result { + Self::open_or_create_append_with_buffer(path, 0) + } + + pub fn open_or_create_append_with_buffer( + path: impl AsRef, + write_buffer_limit: usize, + ) -> Result { + let path = path.as_ref().to_path_buf(); + + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + + let file = OpenOptions::new() + .create(true) + .read(true) + .append(true) + .open(&path)?; + + Ok(Self { + path, + file, + write_buffer: Vec::with_capacity(write_buffer_limit), + write_chunks: Vec::new(), + queued_bytes: 0, + write_buffer_limit, + writable: true, + }) + } + + pub fn append(&mut self, frame: &ReplicationFrame) -> Result<()> { + self.append_payload_segments_with_crc( + frame.epoch, + frame.log_index, + &[frame.payload.as_slice()], + true, + )?; + Ok(()) + } + + pub fn append_payload_segments( + &mut self, + epoch: u64, + log_index: u64, + payload_segments: &[&[u8]], + ) -> Result { + self.append_payload_segments_with_crc(epoch, log_index, payload_segments, true) + } + + pub fn append_payload_segments_with_crc( + &mut self, + epoch: u64, + log_index: u64, + payload_segments: &[&[u8]], + with_crc: bool, + ) -> Result { + if !self.writable { + return Err(KiteError::InvalidReplication( + "cannot append to read-only segment log store".to_string(), + )); + } + + let payload_len = payload_segments.iter().try_fold(0usize, |acc, segment| { + acc + .checked_add(segment.len()) + .ok_or_else(|| KiteError::InvalidReplication("frame payload too large".to_string())) + })?; + + if payload_len > MAX_FRAME_PAYLOAD_BYTES { + return Err(KiteError::InvalidReplication(format!( + "frame payload too large: {} bytes", + payload_len + ))); + } + + let payload_len_u32 = u32::try_from(payload_len).map_err(|_| { + KiteError::InvalidReplication(format!("payload length does not fit u32: {}", payload_len)) + })?; + + let flags = if with_crc { + 0 + } else { + FRAME_FLAG_CRC32_DISABLED + }; + let crc32 = if with_crc { + crc32c_multi(payload_segments) + } else { + 0 + }; + + let mut header = [0u8; FRAME_HEADER_SIZE]; + header[0..4].copy_from_slice(&FRAME_MAGIC.to_le_bytes()); + header[4..6].copy_from_slice(&FRAME_VERSION.to_le_bytes()); + header[6..8].copy_from_slice(&flags.to_le_bytes()); + header[8..16].copy_from_slice(&epoch.to_le_bytes()); + header[16..24].copy_from_slice(&log_index.to_le_bytes()); + header[24..28].copy_from_slice(&payload_len_u32.to_le_bytes()); + header[28..32].copy_from_slice(&crc32.to_le_bytes()); + if self.write_buffer_limit > 0 { + self.write_buffer.extend_from_slice(&header); + for segment in payload_segments { + self.write_buffer.extend_from_slice(segment); + } + if self.write_buffer.len().saturating_add(self.queued_bytes) >= self.write_buffer_limit { + self.flush()?; + } + } else { + self.file.write_all(&header)?; + for segment in payload_segments { + self.file.write_all(segment)?; + } + } + + Ok(FRAME_HEADER_SIZE as u64 + payload_len as u64) + } + + pub fn append_payload_owned_segments_with_crc( + &mut self, + epoch: u64, + log_index: u64, + mut payload_segments: Vec>, + with_crc: bool, + ) -> Result { + if !self.writable { + return Err(KiteError::InvalidReplication( + "cannot append to read-only segment log store".to_string(), + )); + } + + let payload_len = payload_segments.iter().try_fold(0usize, |acc, segment| { + acc + .checked_add(segment.len()) + .ok_or_else(|| KiteError::InvalidReplication("frame payload too large".to_string())) + })?; + + if payload_len > MAX_FRAME_PAYLOAD_BYTES { + return Err(KiteError::InvalidReplication(format!( + "frame payload too large: {} bytes", + payload_len + ))); + } + + let payload_len_u32 = u32::try_from(payload_len).map_err(|_| { + KiteError::InvalidReplication(format!("payload length does not fit u32: {}", payload_len)) + })?; + + let flags = if with_crc { + 0 + } else { + FRAME_FLAG_CRC32_DISABLED + }; + let crc32 = if with_crc { + let refs: Vec<&[u8]> = payload_segments + .iter() + .map(|segment| segment.as_slice()) + .collect(); + crc32c_multi(&refs) + } else { + 0 + }; + + let mut header = [0u8; FRAME_HEADER_SIZE]; + header[0..4].copy_from_slice(&FRAME_MAGIC.to_le_bytes()); + header[4..6].copy_from_slice(&FRAME_VERSION.to_le_bytes()); + header[6..8].copy_from_slice(&flags.to_le_bytes()); + header[8..16].copy_from_slice(&epoch.to_le_bytes()); + header[16..24].copy_from_slice(&log_index.to_le_bytes()); + header[24..28].copy_from_slice(&payload_len_u32.to_le_bytes()); + header[28..32].copy_from_slice(&crc32.to_le_bytes()); + + if self.write_buffer_limit > 0 { + self.write_chunks.push(header.to_vec()); + self.queued_bytes = self.queued_bytes.saturating_add(FRAME_HEADER_SIZE); + for segment in payload_segments.drain(..) { + self.queued_bytes = self.queued_bytes.saturating_add(segment.len()); + self.write_chunks.push(segment); + } + if self.write_buffer.len().saturating_add(self.queued_bytes) >= self.write_buffer_limit { + self.flush()?; + } + } else { + self.file.write_all(&header)?; + for segment in payload_segments { + self.file.write_all(&segment)?; + } + } + + Ok(FRAME_HEADER_SIZE as u64 + payload_len as u64) + } + + pub fn file_len(&self) -> Result { + let metadata = self.file.metadata()?; + Ok( + metadata + .len() + .saturating_add(self.write_buffer.len() as u64) + .saturating_add(self.queued_bytes as u64), + ) + } + + pub fn flush(&mut self) -> Result<()> { + if !self.writable { + return Ok(()); + } + + if self.write_buffer.is_empty() && self.write_chunks.is_empty() { + return Ok(()); + } + + if !self.write_buffer.is_empty() { + self.file.write_all(&self.write_buffer)?; + self.write_buffer.clear(); + } + for chunk in &self.write_chunks { + self.file.write_all(chunk)?; + } + self.write_chunks.clear(); + self.queued_bytes = 0; + Ok(()) + } + + pub fn sync(&mut self) -> Result<()> { + if self.writable { + self.flush()?; + self.file.sync_all()?; + } + + Ok(()) + } + + pub fn read_all(&self) -> Result> { + let file = OpenOptions::new().read(true).open(&self.path)?; + let mut reader = BufReader::new(file); + let mut frames = Vec::new(); + + while let Some(frame) = read_frame(&mut reader)? { + frames.push(frame); + } + + Ok(frames) + } + + pub fn read_filtered( + &self, + mut include: impl FnMut(&ReplicationFrame) -> bool, + max_frames: usize, + ) -> Result> { + let file = OpenOptions::new().read(true).open(&self.path)?; + let mut reader = BufReader::new(file); + let mut frames = Vec::new(); + + while let Some(frame) = read_frame(&mut reader)? { + if include(&frame) { + frames.push(frame); + if max_frames > 0 && frames.len() >= max_frames { + break; + } + } + } + + Ok(frames) + } + + pub fn read_filtered_from_offset( + &self, + start_offset: u64, + mut include: impl FnMut(&ReplicationFrame) -> bool, + max_frames: usize, + ) -> Result<(Vec, u64, Option<(u64, u64)>)> { + let mut file = OpenOptions::new().read(true).open(&self.path)?; + let file_len = file.metadata()?.len(); + let clamped_start = start_offset.min(file_len); + file.seek(SeekFrom::Start(clamped_start))?; + let mut reader = BufReader::new(file); + let mut frames = Vec::new(); + let mut last_seen = None; + + while let Some(frame) = read_frame(&mut reader)? { + last_seen = Some((frame.epoch, frame.log_index)); + if include(&frame) { + frames.push(frame); + if max_frames > 0 && frames.len() >= max_frames { + break; + } + } + } + + let next_offset = reader.stream_position()?; + Ok((frames, next_offset, last_seen)) + } +} + +impl Drop for SegmentLogStore { + fn drop(&mut self) { + let _ = self.flush(); + } +} + +fn read_frame(reader: &mut impl Read) -> Result> { + let magic = match reader.read_u32::() { + Ok(value) => value, + Err(error) if error.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), + Err(error) => return Err(KiteError::Io(error)), + }; + + if magic != FRAME_MAGIC { + return Err(KiteError::InvalidWal(format!( + "invalid replication frame magic: 0x{magic:08X}" + ))); + } + + let version = read_u16_checked(reader, "version")?; + let flags = read_u16_checked(reader, "reserved")?; + let epoch = read_u64_checked(reader, "epoch")?; + let log_index = read_u64_checked(reader, "log_index")?; + let payload_len = read_u32_checked(reader, "payload_len")?; + let stored_crc32 = read_u32_checked(reader, "payload_crc32")?; + + if version != FRAME_VERSION { + return Err(KiteError::VersionMismatch { + required: version as u32, + current: FRAME_VERSION as u32, + }); + } + + if flags & !FRAME_FLAG_CRC32_DISABLED != 0 { + return Err(KiteError::InvalidWal(format!( + "unsupported replication frame flags: 0x{flags:04X}" + ))); + } + + let crc_disabled = (flags & FRAME_FLAG_CRC32_DISABLED) != 0; + let payload_len = payload_len as usize; + if payload_len > MAX_FRAME_PAYLOAD_BYTES { + return Err(KiteError::InvalidWal(format!( + "frame payload exceeds limit: {payload_len}" + ))); + } + + let mut payload = vec![0; payload_len]; + reader + .read_exact(&mut payload) + .map_err(|error| map_unexpected_eof(error, "payload"))?; + + if !crc_disabled { + let computed_crc32 = crc32c(&payload); + if computed_crc32 != stored_crc32 { + return Err(KiteError::CrcMismatch { + stored: stored_crc32, + computed: computed_crc32, + }); + } + } + + Ok(Some(ReplicationFrame::new(epoch, log_index, payload))) +} + +fn read_u16_checked(reader: &mut impl Read, field: &'static str) -> Result { + reader + .read_u16::() + .map_err(|error| map_unexpected_eof(error, field)) +} + +fn read_u32_checked(reader: &mut impl Read, field: &'static str) -> Result { + reader + .read_u32::() + .map_err(|error| map_unexpected_eof(error, field)) +} + +fn read_u64_checked(reader: &mut impl Read, field: &'static str) -> Result { + reader + .read_u64::() + .map_err(|error| map_unexpected_eof(error, field)) +} + +fn map_unexpected_eof(error: io::Error, field: &'static str) -> KiteError { + if error.kind() == io::ErrorKind::UnexpectedEof { + KiteError::InvalidWal(format!( + "truncated replication segment while reading {field}" + )) + } else { + KiteError::Io(error) + } +} + +#[cfg(test)] +mod tests { + use super::{ReplicationFrame, SegmentLogStore, FRAME_HEADER_SIZE}; + + #[test] + fn append_then_scan_roundtrip() { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join("segment.rlog"); + + let mut store = SegmentLogStore::create(&path).expect("create"); + store + .append(&ReplicationFrame::new(1, 1, b"hello".to_vec())) + .expect("append"); + store + .append(&ReplicationFrame::new(1, 2, b"world".to_vec())) + .expect("append"); + store.sync().expect("sync"); + + let reader = SegmentLogStore::open(&path).expect("open"); + let frames = reader.read_all().expect("read"); + + assert_eq!(frames.len(), 2); + assert_eq!(frames[0].payload, b"hello"); + assert_eq!(frames[1].payload, b"world"); + } + + #[test] + fn append_payload_segments_roundtrip() { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join("segment-segmented.rlog"); + + let mut store = SegmentLogStore::create(&path).expect("create"); + store + .append_payload_segments(3, 9, &[b"hello", b"-", b"world"]) + .expect("append"); + store.sync().expect("sync"); + + let reader = SegmentLogStore::open(&path).expect("open"); + let frames = reader.read_all().expect("read"); + assert_eq!(frames.len(), 1); + assert_eq!(frames[0].epoch, 3); + assert_eq!(frames[0].log_index, 9); + assert_eq!(frames[0].payload, b"hello-world"); + } + + #[test] + fn truncated_frame_header_fails() { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join("segment.rlog"); + + let mut store = SegmentLogStore::create(&path).expect("create"); + store + .append(&ReplicationFrame::new(1, 1, b"abc".to_vec())) + .expect("append"); + store.sync().expect("sync"); + + let mut bytes = std::fs::read(&path).expect("read bytes"); + bytes.truncate(FRAME_HEADER_SIZE - 1); + std::fs::write(&path, bytes).expect("write truncated"); + + let reader = SegmentLogStore::open(&path).expect("open"); + assert!(reader.read_all().is_err()); + } +} diff --git a/ray-rs/src/replication/manifest.rs b/ray-rs/src/replication/manifest.rs new file mode 100644 index 0000000..67fb01e --- /dev/null +++ b/ray-rs/src/replication/manifest.rs @@ -0,0 +1,214 @@ +//! Replication manifest sidecar storage. + +use crate::error::{KiteError, Result}; +use crate::util::crc::crc32c; +use serde::{Deserialize, Serialize}; +use std::fs::{self, File, OpenOptions}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +pub const MANIFEST_ENVELOPE_VERSION: u32 = 1; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SegmentMeta { + pub id: u64, + pub start_log_index: u64, + pub end_log_index: u64, + pub size_bytes: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ReplicationManifest { + pub version: u32, + pub epoch: u64, + pub head_log_index: u64, + pub retained_floor: u64, + pub active_segment_id: u64, + pub segments: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct ManifestEnvelope { + pub version: u32, + pub payload_crc32: u32, + pub manifest: ReplicationManifest, +} + +#[derive(Debug, Clone)] +pub struct ManifestStore { + path: PathBuf, +} + +impl ManifestStore { + pub fn new(path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + } + } + + pub fn path(&self) -> &Path { + &self.path + } + + pub fn temp_path(&self) -> PathBuf { + match self + .path + .extension() + .and_then(|extension| extension.to_str()) + { + Some(extension) => self.path.with_extension(format!("{extension}.tmp")), + None => self.path.with_extension("tmp"), + } + } + + pub fn read(&self) -> Result { + let bytes = fs::read(&self.path)?; + decode_manifest_bytes(&bytes) + } + + pub fn write(&self, manifest: &ReplicationManifest) -> Result<()> { + if let Some(parent) = self.path.parent() { + fs::create_dir_all(parent)?; + } + + let temp_path = self.temp_path(); + let bytes = encode_manifest_bytes(manifest)?; + + let mut temp_file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&temp_path)?; + + temp_file.write_all(&bytes)?; + temp_file.sync_all()?; + + fs::rename(&temp_path, &self.path)?; + sync_parent_dir(self.path.parent())?; + + Ok(()) + } +} + +fn encode_manifest_bytes(manifest: &ReplicationManifest) -> Result> { + let payload = serde_json::to_vec(manifest).map_err(|error| { + KiteError::Serialization(format!("encode replication manifest payload: {error}")) + })?; + + let envelope = ManifestEnvelope { + version: MANIFEST_ENVELOPE_VERSION, + payload_crc32: crc32c(&payload), + manifest: manifest.clone(), + }; + + serde_json::to_vec(&envelope).map_err(|error| { + KiteError::Serialization(format!("encode replication manifest envelope: {error}")) + }) +} + +fn decode_manifest_bytes(bytes: &[u8]) -> Result { + let envelope: ManifestEnvelope = serde_json::from_slice(bytes).map_err(|error| { + KiteError::Serialization(format!("decode replication manifest envelope: {error}")) + })?; + + if envelope.version != MANIFEST_ENVELOPE_VERSION { + return Err(KiteError::VersionMismatch { + required: envelope.version, + current: MANIFEST_ENVELOPE_VERSION, + }); + } + + let payload = serde_json::to_vec(&envelope.manifest).map_err(|error| { + KiteError::Serialization(format!("encode replication manifest payload: {error}")) + })?; + + let computed = crc32c(&payload); + if computed != envelope.payload_crc32 { + return Err(KiteError::CrcMismatch { + stored: envelope.payload_crc32, + computed, + }); + } + + Ok(envelope.manifest) +} + +fn sync_parent_dir(parent: Option<&Path>) -> Result<()> { + #[cfg(unix)] + { + if let Some(parent) = parent { + let directory = File::open(parent)?; + directory.sync_all()?; + } + } + + #[cfg(not(unix))] + { + let _ = parent; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::{ManifestEnvelope, ManifestStore, ReplicationManifest, SegmentMeta}; + + fn sample_manifest() -> ReplicationManifest { + ReplicationManifest { + version: 1, + epoch: 7, + head_log_index: 99, + retained_floor: 42, + active_segment_id: 3, + segments: vec![ + SegmentMeta { + id: 2, + start_log_index: 1, + end_log_index: 64, + size_bytes: 1024, + }, + SegmentMeta { + id: 3, + start_log_index: 65, + end_log_index: 99, + size_bytes: 512, + }, + ], + } + } + + #[test] + fn write_then_read_roundtrip() { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join("manifest.json"); + let store = ManifestStore::new(path); + + let manifest = sample_manifest(); + store.write(&manifest).expect("write"); + + let loaded = store.read().expect("read"); + assert_eq!(loaded, manifest); + } + + #[test] + fn checksum_mismatch_fails_read() { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join("manifest.json"); + let store = ManifestStore::new(&path); + + let manifest = sample_manifest(); + store.write(&manifest).expect("write"); + + let mut envelope: ManifestEnvelope = + serde_json::from_slice(&std::fs::read(&path).expect("read bytes")).expect("parse envelope"); + envelope.payload_crc32 ^= 0xFF; + std::fs::write( + &path, + serde_json::to_vec(&envelope).expect("encode envelope"), + ) + .expect("write envelope"); + + assert!(store.read().is_err()); + } +} diff --git a/ray-rs/src/replication/mod.rs b/ray-rs/src/replication/mod.rs new file mode 100644 index 0000000..a671923 --- /dev/null +++ b/ray-rs/src/replication/mod.rs @@ -0,0 +1,17 @@ +//! Replication primitives and runtime wiring. +//! +//! Phase A focuses on deterministic token/cursor parsing and durable sidecar +//! storage primitives. + +pub mod log_store; +pub mod manifest; +pub mod primary; +pub mod progress; +pub mod replica; +pub mod token; +pub mod transport; +pub mod types; + +pub use primary::PrimaryReplicationStatus; +pub use replica::ReplicaReplicationStatus; +pub use types::{CommitToken, ReplicationCursor, ReplicationRole}; diff --git a/ray-rs/src/replication/primary.rs b/ray-rs/src/replication/primary.rs new file mode 100644 index 0000000..20df260 --- /dev/null +++ b/ray-rs/src/replication/primary.rs @@ -0,0 +1,917 @@ +//! Primary-side replication orchestration. + +use super::log_store::SegmentLogStore; +use super::manifest::{ManifestStore, ReplicationManifest, SegmentMeta, MANIFEST_ENVELOPE_VERSION}; +use super::progress::{ + clear_replica_progress, load_replica_progress, upsert_replica_progress, + ReplicaProgress as ReplicaProgressEntry, +}; +use super::transport::build_commit_payload_header; +use super::types::{CommitToken, ReplicationRole}; +use crate::core::single_file::SyncMode; +use crate::error::{KiteError, Result}; +use fs2::FileExt; +use parking_lot::Mutex; +use std::collections::HashMap; +use std::fs::{File, OpenOptions}; +use std::io::ErrorKind; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, Mutex as StdMutex, OnceLock, Weak}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +const MANIFEST_FILE_NAME: &str = "manifest.json"; +const PRIMARY_LOCK_FILE_NAME: &str = "primary.lock"; +const DEFAULT_SEGMENT_MAX_BYTES: u64 = 64 * 1024 * 1024; +const DEFAULT_RETENTION_MIN_ENTRIES: u64 = 1024; +const DEFAULT_MANIFEST_REFRESH_APPEND_INTERVAL: u64 = 256; +const DEFAULT_APPEND_WRITE_BUFFER_BYTES: usize = 16 * 1024 * 1024; + +type SidecarOpLock = Arc>; +type SidecarPrimaryLock = Arc; +type SidecarEpochFence = Arc; + +static SIDECAR_LOCKS: OnceLock>> = OnceLock::new(); +static SIDECAR_PRIMARY_LOCKS: OnceLock< + StdMutex>>, +> = OnceLock::new(); +static SIDECAR_EPOCH_FENCES: OnceLock>>> = + OnceLock::new(); + +#[derive(Debug, Clone)] +pub struct PrimaryReplicationStatus { + pub role: ReplicationRole, + pub epoch: u64, + pub head_log_index: u64, + pub retained_floor: u64, + pub replica_lags: Vec, + pub sidecar_path: PathBuf, + pub last_token: Option, + pub append_attempts: u64, + pub append_failures: u64, + pub append_successes: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ReplicaLagStatus { + pub replica_id: String, + pub epoch: u64, + pub applied_log_index: u64, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PrimaryRetentionOutcome { + pub pruned_segments: usize, + pub retained_floor: u64, +} + +#[derive(Debug)] +struct PrimarySidecarProcessLock { + _file: File, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct ManifestDiskStamp { + len: u64, + modified_unix_nanos: Option, +} + +#[derive(Debug)] +struct PrimaryReplicationState { + manifest: ReplicationManifest, + manifest_disk_stamp: ManifestDiskStamp, + log_store: SegmentLogStore, + active_segment_size_bytes: u64, + last_token: Option, + replica_progress: HashMap, + write_fenced: bool, + appends_since_manifest_refresh: u64, +} + +#[derive(Debug)] +pub struct PrimaryReplication { + sidecar_path: PathBuf, + manifest_store: ManifestStore, + state: Mutex, + append_attempts: AtomicU64, + append_failures: AtomicU64, + append_successes: AtomicU64, + segment_max_bytes: u64, + retention_min_entries: u64, + retention_min_duration: Option, + durable_append: bool, + checksum_payload: bool, + persist_manifest_each_append: bool, + manifest_refresh_append_interval: u64, + append_write_buffer_bytes: usize, + fail_after_append_for_testing: Option, + sidecar_op_lock: SidecarOpLock, + _sidecar_primary_lock: SidecarPrimaryLock, + epoch_fence: SidecarEpochFence, +} + +impl PrimaryReplication { + pub fn open( + db_path: &Path, + sidecar_path: Option, + segment_max_bytes: Option, + retention_min_entries: Option, + retention_min_ms: Option, + sync_mode: SyncMode, + fail_after_append_for_testing: Option, + ) -> Result { + let sidecar_path = + sidecar_path.unwrap_or_else(|| default_replication_sidecar_path(db_path.as_ref())); + std::fs::create_dir_all(&sidecar_path)?; + let sidecar_primary_lock = acquire_sidecar_primary_lock(&sidecar_path)?; + + let manifest_store = ManifestStore::new(sidecar_path.join(MANIFEST_FILE_NAME)); + + let mut manifest = if manifest_store.path().exists() { + manifest_store.read()? + } else { + let initial = ReplicationManifest { + version: MANIFEST_ENVELOPE_VERSION, + epoch: 1, + head_log_index: 0, + retained_floor: 0, + active_segment_id: 1, + segments: vec![SegmentMeta { + id: 1, + start_log_index: 1, + end_log_index: 0, + size_bytes: 0, + }], + }; + manifest_store.write(&initial)?; + initial + }; + + ensure_active_segment_metadata(&mut manifest); + if reconcile_manifest_head_from_active_segment(&sidecar_path, &mut manifest)? { + // Recover append state when manifest head lagged a flushed segment tail. + manifest_store.write(&manifest)?; + } + + let segment_path = sidecar_path.join(segment_file_name(manifest.active_segment_id)); + let active_segment_size_bytes = segment_file_len(&segment_path)?; + let append_write_buffer_bytes = if matches!(sync_mode, SyncMode::Full) { + 0 + } else { + DEFAULT_APPEND_WRITE_BUFFER_BYTES + }; + let log_store = + SegmentLogStore::open_or_create_append_with_buffer(&segment_path, append_write_buffer_bytes)?; + let manifest_disk_stamp = read_manifest_disk_stamp(manifest_store.path())?; + let replica_progress = load_replica_progress(&sidecar_path)?; + + let sidecar_op_lock = sidecar_operation_lock(&sidecar_path); + let epoch_fence = sidecar_epoch_fence(&sidecar_path, manifest.epoch); + + Ok(Self { + sidecar_path, + manifest_store, + state: Mutex::new(PrimaryReplicationState { + manifest, + manifest_disk_stamp, + log_store, + active_segment_size_bytes, + last_token: None, + replica_progress, + write_fenced: false, + appends_since_manifest_refresh: 0, + }), + append_attempts: AtomicU64::new(0), + append_failures: AtomicU64::new(0), + append_successes: AtomicU64::new(0), + segment_max_bytes: segment_max_bytes + .unwrap_or(DEFAULT_SEGMENT_MAX_BYTES) + .max(1), + retention_min_entries: retention_min_entries.unwrap_or(DEFAULT_RETENTION_MIN_ENTRIES), + retention_min_duration: retention_min_ms.map(Duration::from_millis), + durable_append: matches!(sync_mode, SyncMode::Full), + checksum_payload: matches!(sync_mode, SyncMode::Full), + persist_manifest_each_append: matches!(sync_mode, SyncMode::Full), + manifest_refresh_append_interval: if matches!(sync_mode, SyncMode::Full) { + 1 + } else { + DEFAULT_MANIFEST_REFRESH_APPEND_INTERVAL + }, + append_write_buffer_bytes, + fail_after_append_for_testing, + sidecar_op_lock, + _sidecar_primary_lock: sidecar_primary_lock, + epoch_fence, + }) + } + + pub fn append_commit_frame(&self, payload: Vec) -> Result { + self.append_commit_payload_segments(&[payload.as_slice()]) + } + + pub fn append_commit_wal_frame(&self, txid: u64, wal_bytes: Vec) -> Result { + let header = build_commit_payload_header(txid, wal_bytes.len())?; + self.append_commit_payload_owned_segments(vec![header.to_vec(), wal_bytes]) + } + + fn append_commit_payload_segments(&self, payload_segments: &[&[u8]]) -> Result { + self.append_attempts.fetch_add(1, Ordering::Relaxed); + + if let Some(limit) = self.fail_after_append_for_testing { + let successes = self.append_successes.load(Ordering::Relaxed); + if successes >= limit { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(KiteError::InvalidReplication( + "replication append failure injected for testing".to_string(), + )); + } + } + + let _sidecar_guard = self.sidecar_op_lock.lock(); + let mut state = self.state.lock(); + let fenced_epoch = self.epoch_fence.load(Ordering::Acquire); + if fenced_epoch > state.manifest.epoch { + state.write_fenced = true; + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(stale_primary_error()); + } + if state.write_fenced { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(stale_primary_error()); + } + let should_refresh = state.appends_since_manifest_refresh + >= self.manifest_refresh_append_interval.saturating_sub(1); + if should_refresh { + let epoch_changed = self.refresh_manifest_locked(&mut state)?; + state.appends_since_manifest_refresh = 0; + if epoch_changed || state.write_fenced { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(stale_primary_error()); + } + } + + let epoch = state.manifest.epoch; + let next_log_index = state.manifest.head_log_index.saturating_add(1); + + let frame_size = match state.log_store.append_payload_segments_with_crc( + epoch, + next_log_index, + payload_segments, + self.checksum_payload, + ) { + Ok(size) => size, + Err(error) => { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(error); + } + }; + + if self.durable_append { + if let Err(error) = state.log_store.sync() { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(error); + } + } + + let mut next_manifest = state.manifest.clone(); + next_manifest.head_log_index = next_log_index; + + ensure_active_segment_metadata(&mut next_manifest); + state.active_segment_size_bytes = state.active_segment_size_bytes.saturating_add(frame_size); + let size_bytes = state.active_segment_size_bytes; + + if let Some(meta) = next_manifest + .segments + .iter_mut() + .find(|entry| entry.id == next_manifest.active_segment_id) + { + if meta.end_log_index < meta.start_log_index { + meta.start_log_index = next_log_index; + } + meta.end_log_index = next_log_index; + meta.size_bytes = size_bytes; + } + + let mut rotated = false; + if size_bytes >= self.segment_max_bytes { + rotated = true; + next_manifest.active_segment_id = next_manifest.active_segment_id.saturating_add(1); + let start = next_log_index.saturating_add(1); + next_manifest.segments.push(SegmentMeta { + id: next_manifest.active_segment_id, + start_log_index: start, + end_log_index: start.saturating_sub(1), + size_bytes: 0, + }); + } + + let persist_manifest = self.persist_manifest_each_append || rotated || should_refresh; + if persist_manifest || rotated { + if let Err(error) = state.log_store.flush() { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(error); + } + } + if persist_manifest { + if let Err(error) = self.manifest_store.write(&next_manifest) { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(error); + } + state.manifest_disk_stamp = read_manifest_disk_stamp(self.manifest_store.path())?; + } + + let token = CommitToken::new(epoch, next_log_index); + if rotated { + state.log_store = SegmentLogStore::open_or_create_append_with_buffer( + self + .sidecar_path + .join(segment_file_name(next_manifest.active_segment_id)), + self.append_write_buffer_bytes, + )?; + state.active_segment_size_bytes = 0; + } + state.manifest = next_manifest; + state.last_token = Some(token); + state.appends_since_manifest_refresh = state.appends_since_manifest_refresh.saturating_add(1); + self.append_successes.fetch_add(1, Ordering::Relaxed); + self + .epoch_fence + .store(state.manifest.epoch, Ordering::Release); + + Ok(token) + } + + fn append_commit_payload_owned_segments( + &self, + payload_segments: Vec>, + ) -> Result { + self.append_attempts.fetch_add(1, Ordering::Relaxed); + + if let Some(limit) = self.fail_after_append_for_testing { + let successes = self.append_successes.load(Ordering::Relaxed); + if successes >= limit { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(KiteError::InvalidReplication( + "replication append failure injected for testing".to_string(), + )); + } + } + + let _sidecar_guard = self.sidecar_op_lock.lock(); + let mut state = self.state.lock(); + let fenced_epoch = self.epoch_fence.load(Ordering::Acquire); + if fenced_epoch > state.manifest.epoch { + state.write_fenced = true; + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(stale_primary_error()); + } + if state.write_fenced { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(stale_primary_error()); + } + let should_refresh = state.appends_since_manifest_refresh + >= self.manifest_refresh_append_interval.saturating_sub(1); + if should_refresh { + let epoch_changed = self.refresh_manifest_locked(&mut state)?; + state.appends_since_manifest_refresh = 0; + if epoch_changed || state.write_fenced { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(stale_primary_error()); + } + } + + let epoch = state.manifest.epoch; + let next_log_index = state.manifest.head_log_index.saturating_add(1); + + let frame_size = match state.log_store.append_payload_owned_segments_with_crc( + epoch, + next_log_index, + payload_segments, + self.checksum_payload, + ) { + Ok(size) => size, + Err(error) => { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(error); + } + }; + + if self.durable_append { + if let Err(error) = state.log_store.sync() { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(error); + } + } + + let mut next_manifest = state.manifest.clone(); + next_manifest.head_log_index = next_log_index; + + ensure_active_segment_metadata(&mut next_manifest); + state.active_segment_size_bytes = state.active_segment_size_bytes.saturating_add(frame_size); + let size_bytes = state.active_segment_size_bytes; + + if let Some(meta) = next_manifest + .segments + .iter_mut() + .find(|entry| entry.id == next_manifest.active_segment_id) + { + if meta.end_log_index < meta.start_log_index { + meta.start_log_index = next_log_index; + } + meta.end_log_index = next_log_index; + meta.size_bytes = size_bytes; + } + + let mut rotated = false; + if size_bytes >= self.segment_max_bytes { + rotated = true; + next_manifest.active_segment_id = next_manifest.active_segment_id.saturating_add(1); + let start = next_log_index.saturating_add(1); + next_manifest.segments.push(SegmentMeta { + id: next_manifest.active_segment_id, + start_log_index: start, + end_log_index: start.saturating_sub(1), + size_bytes: 0, + }); + } + + let persist_manifest = self.persist_manifest_each_append || rotated || should_refresh; + if persist_manifest || rotated { + if let Err(error) = state.log_store.flush() { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(error); + } + } + if persist_manifest { + if let Err(error) = self.manifest_store.write(&next_manifest) { + self.append_failures.fetch_add(1, Ordering::Relaxed); + return Err(error); + } + state.manifest_disk_stamp = read_manifest_disk_stamp(self.manifest_store.path())?; + } + + let token = CommitToken::new(epoch, next_log_index); + if rotated { + state.log_store = SegmentLogStore::open_or_create_append_with_buffer( + self + .sidecar_path + .join(segment_file_name(next_manifest.active_segment_id)), + self.append_write_buffer_bytes, + )?; + state.active_segment_size_bytes = 0; + } + state.manifest = next_manifest; + state.last_token = Some(token); + state.appends_since_manifest_refresh = state.appends_since_manifest_refresh.saturating_add(1); + self.append_successes.fetch_add(1, Ordering::Relaxed); + self + .epoch_fence + .store(state.manifest.epoch, Ordering::Release); + + Ok(token) + } + + pub fn promote_to_next_epoch(&self) -> Result { + let _sidecar_guard = self.sidecar_op_lock.lock(); + let mut state = self.state.lock(); + let epoch_changed = self.refresh_manifest_locked(&mut state)?; + if epoch_changed || state.write_fenced { + return Ok(state.manifest.epoch); + } + + let mut next_manifest = state.manifest.clone(); + next_manifest.epoch = next_manifest.epoch.saturating_add(1); + next_manifest.active_segment_id = next_manifest.active_segment_id.saturating_add(1); + next_manifest.segments.push(SegmentMeta { + id: next_manifest.active_segment_id, + start_log_index: next_manifest.head_log_index.saturating_add(1), + end_log_index: next_manifest.head_log_index, + size_bytes: 0, + }); + ensure_active_segment_metadata(&mut next_manifest); + self.manifest_store.write(&next_manifest)?; + state.manifest_disk_stamp = read_manifest_disk_stamp(self.manifest_store.path())?; + + state.log_store = SegmentLogStore::open_or_create_append_with_buffer( + self + .sidecar_path + .join(segment_file_name(next_manifest.active_segment_id)), + self.append_write_buffer_bytes, + )?; + state.active_segment_size_bytes = 0; + state.manifest = next_manifest; + state.last_token = None; + state.replica_progress.clear(); + clear_replica_progress(&self.sidecar_path)?; + state.write_fenced = false; + state.appends_since_manifest_refresh = 0; + self + .epoch_fence + .store(state.manifest.epoch, Ordering::Release); + Ok(state.manifest.epoch) + } + + pub fn report_replica_progress( + &self, + replica_id: &str, + epoch: u64, + applied_log_index: u64, + ) -> Result<()> { + let _sidecar_guard = self.sidecar_op_lock.lock(); + let mut state = self.state.lock(); + let epoch_changed = self.refresh_manifest_locked(&mut state)?; + if epoch_changed || state.write_fenced { + return Err(stale_primary_error()); + } + if epoch != state.manifest.epoch { + return Err(KiteError::InvalidReplication(format!( + "replica progress epoch mismatch: reported {epoch}, primary epoch {}", + state.manifest.epoch + ))); + } + + upsert_replica_progress(&self.sidecar_path, replica_id, epoch, applied_log_index)?; + state.replica_progress.insert( + replica_id.to_string(), + ReplicaProgressEntry { + epoch, + applied_log_index, + }, + ); + Ok(()) + } + + pub fn run_retention(&self) -> Result { + let _sidecar_guard = self.sidecar_op_lock.lock(); + let mut state = self.state.lock(); + let epoch_changed = self.refresh_manifest_locked(&mut state)?; + if epoch_changed || state.write_fenced { + return Err(stale_primary_error()); + } + self.refresh_replica_progress_locked(&mut state)?; + state.log_store.flush()?; + + let head = state.manifest.head_log_index; + let window_floor = head.saturating_sub(self.retention_min_entries); + let replica_floor = state + .replica_progress + .values() + .filter(|progress| progress.epoch == state.manifest.epoch) + .map(|progress| progress.applied_log_index.saturating_add(1)) + .min(); + let target_floor = window_floor + .min(replica_floor.unwrap_or(window_floor)) + .max(state.manifest.retained_floor); + + let mut next_manifest = state.manifest.clone(); + next_manifest.retained_floor = target_floor; + let retention_cutoff = self + .retention_min_duration + .and_then(|duration| SystemTime::now().checked_sub(duration)); + + let active_segment_id = next_manifest.active_segment_id; + let mut pruned_ids = Vec::new(); + let mut retained_segments = Vec::with_capacity(next_manifest.segments.len()); + for segment in &next_manifest.segments { + if segment.id == active_segment_id { + retained_segments.push(segment.clone()); + continue; + } + + let prune_by_index = segment.end_log_index > 0 && segment.end_log_index < target_floor; + if !prune_by_index { + retained_segments.push(segment.clone()); + continue; + } + + if !self.segment_old_enough_for_prune(segment.id, retention_cutoff)? { + retained_segments.push(segment.clone()); + continue; + } + + pruned_ids.push(segment.id); + } + next_manifest.segments = retained_segments; + ensure_active_segment_metadata(&mut next_manifest); + + self.manifest_store.write(&next_manifest)?; + state.manifest_disk_stamp = read_manifest_disk_stamp(self.manifest_store.path())?; + state.manifest = next_manifest; + state.appends_since_manifest_refresh = 0; + + for id in &pruned_ids { + let segment_path = self.sidecar_path.join(segment_file_name(*id)); + if segment_path.exists() { + std::fs::remove_file(&segment_path)?; + } + } + + Ok(PrimaryRetentionOutcome { + pruned_segments: pruned_ids.len(), + retained_floor: target_floor, + }) + } + + pub fn last_token(&self) -> Option { + self.state.lock().last_token + } + + pub fn status(&self) -> PrimaryReplicationStatus { + let state = self.state.lock(); + let mut replica_lags: Vec = state + .replica_progress + .iter() + .map(|(replica_id, progress)| ReplicaLagStatus { + replica_id: replica_id.clone(), + epoch: progress.epoch, + applied_log_index: progress.applied_log_index, + }) + .collect(); + replica_lags.sort_by(|left, right| left.replica_id.cmp(&right.replica_id)); + + PrimaryReplicationStatus { + role: ReplicationRole::Primary, + epoch: state.manifest.epoch, + head_log_index: state.manifest.head_log_index, + retained_floor: state.manifest.retained_floor, + replica_lags, + sidecar_path: self.sidecar_path.clone(), + last_token: state.last_token, + append_attempts: self.append_attempts.load(Ordering::Relaxed), + append_failures: self.append_failures.load(Ordering::Relaxed), + append_successes: self.append_successes.load(Ordering::Relaxed), + } + } + + pub fn flush_for_transport_export(&self) -> Result<()> { + let _sidecar_guard = self.sidecar_op_lock.lock(); + let mut state = self.state.lock(); + state.log_store.flush() + } + + fn refresh_manifest_locked(&self, state: &mut PrimaryReplicationState) -> Result { + let disk_stamp = read_manifest_disk_stamp(self.manifest_store.path())?; + if disk_stamp == state.manifest_disk_stamp { + return Ok(false); + } + + let mut persisted = self.manifest_store.read()?; + ensure_active_segment_metadata(&mut persisted); + + let epoch_changed = persisted.epoch != state.manifest.epoch; + let active_changed = persisted.active_segment_id != state.manifest.active_segment_id; + state.manifest_disk_stamp = disk_stamp; + + if epoch_changed { + state.write_fenced = true; + state.manifest = persisted; + self + .epoch_fence + .store(state.manifest.epoch, Ordering::Release); + if active_changed { + state.log_store = SegmentLogStore::open_or_create_append_with_buffer( + self + .sidecar_path + .join(segment_file_name(state.manifest.active_segment_id)), + self.append_write_buffer_bytes, + )?; + state.active_segment_size_bytes = segment_file_len( + &self + .sidecar_path + .join(segment_file_name(state.manifest.active_segment_id)), + )?; + } + return Ok(true); + } + + if self.persist_manifest_each_append { + state.manifest = persisted; + if active_changed { + state.log_store = SegmentLogStore::open_or_create_append_with_buffer( + self + .sidecar_path + .join(segment_file_name(state.manifest.active_segment_id)), + self.append_write_buffer_bytes, + )?; + state.active_segment_size_bytes = segment_file_len( + &self + .sidecar_path + .join(segment_file_name(state.manifest.active_segment_id)), + )?; + } + return Ok(false); + } + + if active_changed { + state.write_fenced = true; + state.manifest = persisted; + self + .epoch_fence + .store(state.manifest.epoch, Ordering::Release); + state.log_store = SegmentLogStore::open_or_create_append_with_buffer( + self + .sidecar_path + .join(segment_file_name(state.manifest.active_segment_id)), + self.append_write_buffer_bytes, + )?; + state.active_segment_size_bytes = segment_file_len( + &self + .sidecar_path + .join(segment_file_name(state.manifest.active_segment_id)), + )?; + return Ok(false); + } + + if persisted.retained_floor > state.manifest.retained_floor { + state.manifest.retained_floor = persisted.retained_floor; + } + + Ok(false) + } + + fn refresh_replica_progress_locked(&self, state: &mut PrimaryReplicationState) -> Result<()> { + state.replica_progress = load_replica_progress(&self.sidecar_path)?; + Ok(()) + } + + fn segment_old_enough_for_prune( + &self, + segment_id: u64, + retention_cutoff: Option, + ) -> Result { + let Some(cutoff) = retention_cutoff else { + return Ok(true); + }; + + let segment_path = self.sidecar_path.join(segment_file_name(segment_id)); + let metadata = match std::fs::metadata(&segment_path) { + Ok(metadata) => metadata, + Err(error) if error.kind() == ErrorKind::NotFound => return Ok(true), + Err(error) => return Err(error.into()), + }; + + let modified = match metadata.modified() { + Ok(modified) => modified, + Err(_) => return Ok(false), + }; + + Ok(modified <= cutoff) + } +} + +pub fn default_replication_sidecar_path(db_path: &Path) -> PathBuf { + let file_name = db_path + .file_name() + .map(|name| format!("{}.replication", name.to_string_lossy())) + .unwrap_or_else(|| "replication-sidecar".to_string()); + + match db_path.parent() { + Some(parent) => parent.join(file_name), + None => PathBuf::from(file_name), + } +} + +fn ensure_active_segment_metadata(manifest: &mut ReplicationManifest) { + let active_id = manifest.active_segment_id; + if manifest.segments.iter().any(|entry| entry.id == active_id) { + return; + } + + let start = manifest.head_log_index.saturating_add(1); + manifest.segments.push(SegmentMeta { + id: active_id, + start_log_index: start, + end_log_index: start.saturating_sub(1), + size_bytes: 0, + }); +} + +fn segment_file_name(id: u64) -> String { + format!("segment-{id:020}.rlog") +} + +fn reconcile_manifest_head_from_active_segment( + sidecar_path: &Path, + manifest: &mut ReplicationManifest, +) -> Result { + let segment_path = sidecar_path.join(segment_file_name(manifest.active_segment_id)); + if !segment_path.exists() { + return Ok(false); + } + + let (_, _, last_seen) = + SegmentLogStore::open(&segment_path)?.read_filtered_from_offset(0, |_| false, 0)?; + let Some((segment_epoch, segment_head_log_index)) = last_seen else { + return Ok(false); + }; + + if segment_epoch != manifest.epoch || segment_head_log_index <= manifest.head_log_index { + return Ok(false); + } + + manifest.head_log_index = segment_head_log_index; + if let Some(active_segment) = manifest + .segments + .iter_mut() + .find(|entry| entry.id == manifest.active_segment_id) + { + if active_segment.end_log_index < segment_head_log_index { + active_segment.end_log_index = segment_head_log_index; + } + if active_segment.start_log_index > active_segment.end_log_index { + active_segment.start_log_index = active_segment.end_log_index; + } + active_segment.size_bytes = segment_file_len(&segment_path)?; + } + + ensure_active_segment_metadata(manifest); + Ok(true) +} + +fn stale_primary_error() -> KiteError { + KiteError::InvalidReplication("stale primary is fenced for writes".to_string()) +} + +fn read_manifest_disk_stamp(path: &Path) -> Result { + let metadata = std::fs::metadata(path)?; + let modified_unix_nanos = metadata + .modified() + .ok() + .and_then(|value| value.duration_since(UNIX_EPOCH).ok()) + .map(|value| value.as_nanos()); + + Ok(ManifestDiskStamp { + len: metadata.len(), + modified_unix_nanos, + }) +} + +fn segment_file_len(path: &Path) -> Result { + match std::fs::metadata(path) { + Ok(metadata) => Ok(metadata.len()), + Err(error) if error.kind() == ErrorKind::NotFound => Ok(0), + Err(error) => Err(error.into()), + } +} + +fn sidecar_operation_lock(sidecar_path: &Path) -> SidecarOpLock { + let registry = SIDECAR_LOCKS.get_or_init(|| StdMutex::new(HashMap::new())); + let mut registry = registry.lock().expect("sidecar lock registry poisoned"); + registry + .entry(sidecar_path.to_path_buf()) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone() +} + +fn acquire_sidecar_primary_lock(sidecar_path: &Path) -> Result { + let key = normalize_sidecar_path(sidecar_path); + let registry = SIDECAR_PRIMARY_LOCKS.get_or_init(|| StdMutex::new(HashMap::new())); + let mut registry = registry + .lock() + .map_err(|_| KiteError::LockFailed("primary sidecar lock registry poisoned".to_string()))?; + + if let Some(existing) = registry.get(&key).and_then(Weak::upgrade) { + return Ok(existing); + } + + let lock_path = key.join(PRIMARY_LOCK_FILE_NAME); + let lock_file = OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(&lock_path)?; + lock_file.try_lock_exclusive().map_err(|error| { + KiteError::LockFailed(format!( + "primary sidecar lock is held by another process: {} ({error})", + lock_path.display() + )) + })?; + + let lock = Arc::new(PrimarySidecarProcessLock { _file: lock_file }); + registry.insert(key, Arc::downgrade(&lock)); + Ok(lock) +} + +fn sidecar_epoch_fence(sidecar_path: &Path, initial_epoch: u64) -> SidecarEpochFence { + let key = normalize_sidecar_path(sidecar_path); + let registry = SIDECAR_EPOCH_FENCES.get_or_init(|| StdMutex::new(HashMap::new())); + let mut registry = registry + .lock() + .expect("sidecar epoch fence registry poisoned"); + let entry = registry + .entry(key) + .or_insert_with(|| Arc::downgrade(&Arc::new(AtomicU64::new(initial_epoch)))); + let fence = if let Some(existing) = entry.upgrade() { + existing + } else { + let created = Arc::new(AtomicU64::new(initial_epoch)); + *entry = Arc::downgrade(&created); + created + }; + fence.fetch_max(initial_epoch, Ordering::AcqRel); + fence +} + +fn normalize_sidecar_path(path: &Path) -> PathBuf { + std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf()) +} diff --git a/ray-rs/src/replication/progress.rs b/ray-rs/src/replication/progress.rs new file mode 100644 index 0000000..2e979ae --- /dev/null +++ b/ray-rs/src/replication/progress.rs @@ -0,0 +1,151 @@ +//! Replica progress persistence shared by primary and replicas. + +use crate::error::{KiteError, Result}; +use fs2::FileExt; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fs::{self, OpenOptions}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +const REPLICA_PROGRESS_FILE_NAME: &str = "replica-progress.json"; +const REPLICA_PROGRESS_LOCK_FILE_NAME: &str = "replica-progress.lock"; +const REPLICA_PROGRESS_VERSION: u32 = 1; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ReplicaProgress { + pub epoch: u64, + pub applied_log_index: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ReplicaProgressEnvelope { + version: u32, + #[serde(default)] + replicas: HashMap, +} + +pub fn load_replica_progress(sidecar_path: &Path) -> Result> { + std::fs::create_dir_all(sidecar_path)?; + with_progress_lock(sidecar_path, || { + read_progress_file(&progress_file_path(sidecar_path)) + }) +} + +pub fn upsert_replica_progress( + sidecar_path: &Path, + replica_id: &str, + epoch: u64, + applied_log_index: u64, +) -> Result<()> { + std::fs::create_dir_all(sidecar_path)?; + with_progress_lock(sidecar_path, || { + let file_path = progress_file_path(sidecar_path); + let mut progress = read_progress_file(&file_path)?; + progress.insert( + replica_id.to_string(), + ReplicaProgress { + epoch, + applied_log_index, + }, + ); + write_progress_file(&file_path, &progress) + }) +} + +pub fn clear_replica_progress(sidecar_path: &Path) -> Result<()> { + std::fs::create_dir_all(sidecar_path)?; + with_progress_lock(sidecar_path, || { + write_progress_file(&progress_file_path(sidecar_path), &HashMap::new()) + }) +} + +fn progress_file_path(sidecar_path: &Path) -> PathBuf { + sidecar_path.join(REPLICA_PROGRESS_FILE_NAME) +} + +fn lock_file_path(sidecar_path: &Path) -> PathBuf { + sidecar_path.join(REPLICA_PROGRESS_LOCK_FILE_NAME) +} + +fn read_progress_file(path: &Path) -> Result> { + if !path.exists() { + return Ok(HashMap::new()); + } + + let bytes = fs::read(path)?; + let envelope: ReplicaProgressEnvelope = serde_json::from_slice(&bytes).map_err(|error| { + KiteError::Serialization(format!("decode replica progress envelope: {error}")) + })?; + + if envelope.version != REPLICA_PROGRESS_VERSION { + return Err(KiteError::VersionMismatch { + required: envelope.version, + current: REPLICA_PROGRESS_VERSION, + }); + } + + Ok(envelope.replicas) +} + +fn write_progress_file(path: &Path, progress: &HashMap) -> Result<()> { + let envelope = ReplicaProgressEnvelope { + version: REPLICA_PROGRESS_VERSION, + replicas: progress.clone(), + }; + let bytes = serde_json::to_vec(&envelope).map_err(|error| { + KiteError::Serialization(format!("encode replica progress envelope: {error}")) + })?; + + let temp_path = temp_file_path(path); + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&temp_path)?; + file.write_all(&bytes)?; + file.sync_all()?; + fs::rename(&temp_path, path)?; + sync_parent_dir(path.parent())?; + Ok(()) +} + +fn temp_file_path(path: &Path) -> PathBuf { + match path.extension().and_then(|extension| extension.to_str()) { + Some(extension) => path.with_extension(format!("{extension}.tmp")), + None => path.with_extension("tmp"), + } +} + +fn with_progress_lock(sidecar_path: &Path, f: impl FnOnce() -> Result) -> Result { + let lock_file = OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(lock_file_path(sidecar_path))?; + lock_file.lock_exclusive()?; + + let result = f(); + let unlock_result = fs2::FileExt::unlock(&lock_file); + match (result, unlock_result) { + (Ok(value), Ok(())) => Ok(value), + (Ok(_), Err(error)) => Err(error.into()), + (Err(error), _) => Err(error), + } +} + +fn sync_parent_dir(parent: Option<&Path>) -> Result<()> { + #[cfg(unix)] + { + if let Some(parent) = parent { + std::fs::File::open(parent)?.sync_all()?; + } + } + + #[cfg(not(unix))] + { + let _ = parent; + } + + Ok(()) +} diff --git a/ray-rs/src/replication/replica.rs b/ray-rs/src/replication/replica.rs new file mode 100644 index 0000000..57b604e --- /dev/null +++ b/ray-rs/src/replication/replica.rs @@ -0,0 +1,509 @@ +//! Replica-side bootstrap/pull/apply orchestration support. + +use super::log_store::{ReplicationFrame, SegmentLogStore}; +use super::manifest::{ManifestStore, ReplicationManifest}; +use super::primary::default_replication_sidecar_path; +use super::progress::upsert_replica_progress; +use super::types::ReplicationRole; +use crate::error::{KiteError, Result}; +use parking_lot::Mutex; +use serde::{Deserialize, Serialize}; +use std::fs::OpenOptions; +use std::io::Write; +use std::path::{Path, PathBuf}; + +const MANIFEST_FILE_NAME: &str = "manifest.json"; +const CURSOR_FILE_NAME: &str = "replica-cursor.json"; +const TRANSIENT_MISSING_RESEED_ATTEMPTS: u32 = 8; + +#[derive(Debug, Clone)] +pub struct ReplicaReplicationStatus { + pub role: ReplicationRole, + pub source_db_path: Option, + pub source_sidecar_path: Option, + pub applied_epoch: u64, + pub applied_log_index: u64, + pub last_error: Option, + pub needs_reseed: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(default)] +struct ReplicaCursorState { + applied_epoch: u64, + applied_log_index: u64, + last_error: Option, + needs_reseed: bool, + transient_missing_attempts: u32, + transient_missing_epoch: u64, + transient_missing_log_index: u64, +} + +#[derive(Debug, Clone, Copy)] +struct SegmentScanHint { + epoch: u64, + segment_id: u64, + next_offset: u64, + next_log_index: u64, +} + +#[derive(Debug)] +pub struct ReplicaReplication { + local_sidecar_path: PathBuf, + cursor_state_path: PathBuf, + replica_id: String, + source_db_path: Option, + source_sidecar_path: Option, + state: Mutex, + scan_hint: Mutex>, +} + +impl ReplicaReplication { + pub fn open( + replica_db_path: &Path, + local_sidecar_path: Option, + source_db_path: Option, + source_sidecar_path: Option, + ) -> Result { + let local_sidecar_path = + local_sidecar_path.unwrap_or_else(|| default_replication_sidecar_path(replica_db_path)); + std::fs::create_dir_all(&local_sidecar_path)?; + let replica_id = normalize_path_for_compare(&local_sidecar_path) + .to_string_lossy() + .to_string(); + + let cursor_state_path = local_sidecar_path.join(CURSOR_FILE_NAME); + let state = load_cursor_state(&cursor_state_path)?; + + let source_db_path = source_db_path.ok_or_else(|| { + KiteError::InvalidReplication("replica source db path is not configured".to_string()) + })?; + if !source_db_path.exists() { + return Err(KiteError::InvalidReplication(format!( + "replica source db path does not exist: {}", + source_db_path.display() + ))); + } + if source_db_path.is_dir() { + return Err(KiteError::InvalidReplication(format!( + "replica source db path must be a file: {}", + source_db_path.display() + ))); + } + if paths_equivalent(replica_db_path, &source_db_path) { + return Err(KiteError::InvalidReplication( + "replica source db path must differ from replica db path".to_string(), + )); + } + + let source_sidecar_path = + source_sidecar_path.or_else(|| Some(default_replication_sidecar_path(&source_db_path))); + if let Some(path) = source_sidecar_path.as_ref() { + if path.exists() && !path.is_dir() { + return Err(KiteError::InvalidReplication(format!( + "replica source sidecar path must be a directory: {}", + path.display() + ))); + } + if paths_equivalent(path, &local_sidecar_path) { + return Err(KiteError::InvalidReplication( + "replica source sidecar path must differ from local sidecar path".to_string(), + )); + } + } + + Ok(Self { + local_sidecar_path, + cursor_state_path, + replica_id, + source_db_path: Some(source_db_path), + source_sidecar_path, + state: Mutex::new(state), + scan_hint: Mutex::new(None), + }) + } + + pub fn source_db_path(&self) -> Option { + self.source_db_path.clone() + } + + pub fn source_sidecar_path(&self) -> Option { + self.source_sidecar_path.clone() + } + + pub fn applied_position(&self) -> (u64, u64) { + let state = self.state.lock(); + (state.applied_epoch, state.applied_log_index) + } + + pub fn source_head_position(&self) -> Result<(u64, u64)> { + let source_sidecar_path = self.source_sidecar_path.as_ref().ok_or_else(|| { + KiteError::InvalidReplication("replica source sidecar path is not configured".to_string()) + })?; + + let manifest = ManifestStore::new(source_sidecar_path.join(MANIFEST_FILE_NAME)).read()?; + Ok((manifest.epoch, manifest.head_log_index)) + } + + pub fn mark_applied(&self, epoch: u64, log_index: u64) -> Result<()> { + let mut state = self.state.lock(); + + if state.applied_epoch > epoch + || (state.applied_epoch == epoch && state.applied_log_index > log_index) + { + return Err(KiteError::InvalidReplication(format!( + "attempted to move replica cursor backwards: {}:{} -> {}:{}", + state.applied_epoch, state.applied_log_index, epoch, log_index + ))); + } + + let mut next_state = state.clone(); + next_state.applied_epoch = epoch; + next_state.applied_log_index = log_index; + next_state.last_error = None; + next_state.needs_reseed = false; + clear_transient_missing_state(&mut next_state); + persist_cursor_state(&self.cursor_state_path, &next_state)?; + *state = next_state; + drop(state); + self.report_source_progress(epoch, log_index) + } + + pub fn mark_error(&self, message: impl Into, needs_reseed: bool) -> Result<()> { + let mut state = self.state.lock(); + let mut next_state = state.clone(); + next_state.last_error = Some(message.into()); + next_state.needs_reseed = needs_reseed; + clear_transient_missing_state(&mut next_state); + persist_cursor_state(&self.cursor_state_path, &next_state)?; + *state = next_state; + Ok(()) + } + + pub fn clear_error(&self) -> Result<()> { + let mut state = self.state.lock(); + if state.last_error.is_none() && !state.needs_reseed && state.transient_missing_attempts == 0 { + return Ok(()); + } + let mut next_state = state.clone(); + next_state.last_error = None; + next_state.needs_reseed = false; + clear_transient_missing_state(&mut next_state); + persist_cursor_state(&self.cursor_state_path, &next_state)?; + *state = next_state; + Ok(()) + } + + pub fn status(&self) -> ReplicaReplicationStatus { + let state = self.state.lock(); + ReplicaReplicationStatus { + role: ReplicationRole::Replica, + source_db_path: self.source_db_path.clone(), + source_sidecar_path: self.source_sidecar_path.clone(), + applied_epoch: state.applied_epoch, + applied_log_index: state.applied_log_index, + last_error: state.last_error.clone(), + needs_reseed: state.needs_reseed, + } + } + + pub fn frames_after( + &self, + max_frames: usize, + include_last_applied: bool, + ) -> Result> { + let source_sidecar_path = self.source_sidecar_path.as_ref().ok_or_else(|| { + KiteError::InvalidReplication("replica source sidecar path is not configured".to_string()) + })?; + + let (applied_epoch, applied_log_index) = self.applied_position(); + let manifest = ManifestStore::new(source_sidecar_path.join(MANIFEST_FILE_NAME)).read()?; + let expected_next_log = applied_log_index.saturating_add(1); + if expected_next_log < manifest.retained_floor { + let message = format!( + "replica needs reseed: applied log {} is below retained floor {}", + applied_log_index, manifest.retained_floor + ); + self.mark_error(message.clone(), true)?; + return Err(KiteError::InvalidReplication(message)); + } + + let mut scan_hint = self.scan_hint.lock(); + let filtered = read_frames_after( + source_sidecar_path, + &manifest, + applied_epoch, + applied_log_index, + include_last_applied, + max_frames, + &mut scan_hint, + )?; + + if let Some(first) = filtered.first() { + if first.log_index > expected_next_log { + let detail = format!( + "missing log range {}..{}", + expected_next_log, + first.log_index.saturating_sub(1) + ); + return self.transient_gap_error(applied_epoch, expected_next_log, detail); + } + } + + if filtered.is_empty() && manifest.head_log_index > applied_log_index { + let detail = format!( + "applied log {} but primary head is {} and required frames are unavailable", + applied_log_index, manifest.head_log_index + ); + return self.transient_gap_error(applied_epoch, expected_next_log, detail); + } + + Ok(filtered) + } + + pub fn local_sidecar_path(&self) -> &Path { + &self.local_sidecar_path + } + + fn report_source_progress(&self, epoch: u64, log_index: u64) -> Result<()> { + if let Some(source_sidecar_path) = self.source_sidecar_path.as_ref() { + upsert_replica_progress(source_sidecar_path, &self.replica_id, epoch, log_index)?; + } + Ok(()) + } + + fn transient_gap_error( + &self, + applied_epoch: u64, + expected_next_log: u64, + detail: String, + ) -> Result> { + let mut state = self.state.lock(); + let mut next_state = state.clone(); + if next_state.transient_missing_epoch != applied_epoch + || next_state.transient_missing_log_index != expected_next_log + { + next_state.transient_missing_attempts = 0; + next_state.transient_missing_epoch = applied_epoch; + next_state.transient_missing_log_index = expected_next_log; + } + next_state.transient_missing_attempts = next_state.transient_missing_attempts.saturating_add(1); + let attempts = next_state.transient_missing_attempts; + let needs_reseed = attempts >= TRANSIENT_MISSING_RESEED_ATTEMPTS; + let error_message = if needs_reseed { + format!("replica needs reseed: {detail}") + } else { + format!( + "replica missing frames after {}:{} ({detail}); transient retry {attempts}/{}", + applied_epoch, expected_next_log, TRANSIENT_MISSING_RESEED_ATTEMPTS + ) + }; + next_state.last_error = Some(error_message.clone()); + next_state.needs_reseed = needs_reseed; + if needs_reseed { + clear_transient_missing_state(&mut next_state); + } + persist_cursor_state(&self.cursor_state_path, &next_state)?; + *state = next_state; + Err(KiteError::InvalidReplication(error_message)) + } +} + +fn load_cursor_state(path: &Path) -> Result { + if !path.exists() { + return Ok(ReplicaCursorState::default()); + } + + let bytes = std::fs::read(path)?; + let state: ReplicaCursorState = serde_json::from_slice(&bytes).map_err(|error| { + KiteError::Serialization(format!("decode replica cursor state failed: {error}")) + })?; + Ok(state) +} + +fn persist_cursor_state(path: &Path, state: &ReplicaCursorState) -> Result<()> { + let tmp_path = path.with_extension("json.tmp"); + let bytes = serde_json::to_vec(state).map_err(|error| { + KiteError::Serialization(format!("encode replica cursor state failed: {error}")) + })?; + + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&tmp_path)?; + file.write_all(&bytes)?; + file.sync_all()?; + std::fs::rename(&tmp_path, path)?; + sync_parent_dir(path.parent())?; + Ok(()) +} + +fn sync_parent_dir(parent: Option<&Path>) -> Result<()> { + #[cfg(unix)] + { + if let Some(parent) = parent { + std::fs::File::open(parent)?.sync_all()?; + } + } + + #[cfg(windows)] + { + if let Some(parent) = parent { + use std::os::windows::fs::OpenOptionsExt; + + const FILE_FLAG_BACKUP_SEMANTICS: u32 = 0x02000000; + let directory = OpenOptions::new() + .read(true) + .custom_flags(FILE_FLAG_BACKUP_SEMANTICS) + .open(parent)?; + directory.sync_all()?; + } + } + + #[cfg(not(any(unix, windows)))] + { + let _ = parent; + } + + Ok(()) +} + +fn clear_transient_missing_state(state: &mut ReplicaCursorState) { + state.transient_missing_attempts = 0; + state.transient_missing_epoch = 0; + state.transient_missing_log_index = 0; +} + +fn read_frames_after( + sidecar_path: &Path, + manifest: &ReplicationManifest, + applied_epoch: u64, + applied_log_index: u64, + include_last_applied: bool, + max_frames: usize, + scan_hint: &mut Option, +) -> Result> { + let minimum_log_index = if include_last_applied && applied_log_index > 0 { + applied_log_index + } else { + applied_log_index.saturating_add(1) + }; + + let mut segments = manifest.segments.clone(); + segments.sort_by_key(|segment| segment.id); + + let mut frames = Vec::new(); + for segment in segments { + if segment.end_log_index > 0 && segment.end_log_index < minimum_log_index { + continue; + } + + let segment_path = sidecar_path.join(segment_file_name(segment.id)); + if !segment_path.exists() { + continue; + } + + let remaining = if max_frames > 0 { + max_frames.saturating_sub(frames.len()) + } else { + usize::MAX + }; + if remaining == 0 { + break; + } + + let start_offset = scan_hint + .as_ref() + .filter(|hint| { + hint.epoch == manifest.epoch + && hint.segment_id == segment.id + && hint.next_log_index <= minimum_log_index + }) + .map(|hint| hint.next_offset) + .unwrap_or(0); + + let (segment_frames, next_offset, last_seen) = SegmentLogStore::open(&segment_path)? + .read_filtered_from_offset( + start_offset, + |frame| { + frame_is_after_applied( + frame, + applied_epoch, + applied_log_index, + include_last_applied, + ) + }, + remaining, + )?; + + if let Some((last_epoch, last_log_index)) = last_seen { + *scan_hint = Some(SegmentScanHint { + epoch: last_epoch, + segment_id: segment.id, + next_offset, + next_log_index: last_log_index.saturating_add(1), + }); + } + frames.extend(segment_frames); + + if max_frames > 0 && frames.len() >= max_frames { + break; + } + } + + if frames.len() > 1 { + frames.sort_by(|left, right| { + left + .epoch + .cmp(&right.epoch) + .then_with(|| left.log_index.cmp(&right.log_index)) + }); + } + + if max_frames > 0 && frames.len() > max_frames { + frames.truncate(max_frames); + } + + Ok(frames) +} + +fn frame_is_after_applied( + frame: &ReplicationFrame, + applied_epoch: u64, + applied_log_index: u64, + include_last_applied: bool, +) -> bool { + if frame.epoch > applied_epoch { + return true; + } + if frame.epoch < applied_epoch { + return false; + } + + if include_last_applied && applied_log_index > 0 { + frame.log_index >= applied_log_index + } else { + frame.log_index > applied_log_index + } +} + +fn segment_file_name(id: u64) -> String { + format!("segment-{id:020}.rlog") +} + +fn normalize_path_for_compare(path: &Path) -> PathBuf { + let absolute = if path.is_absolute() { + path.to_path_buf() + } else { + match std::env::current_dir() { + Ok(cwd) => cwd.join(path), + Err(_) => path.to_path_buf(), + } + }; + std::fs::canonicalize(&absolute).unwrap_or(absolute) +} + +fn paths_equivalent(left: &Path, right: &Path) -> bool { + normalize_path_for_compare(left) == normalize_path_for_compare(right) +} diff --git a/ray-rs/src/replication/token.rs b/ray-rs/src/replication/token.rs new file mode 100644 index 0000000..6a2ee1a --- /dev/null +++ b/ray-rs/src/replication/token.rs @@ -0,0 +1,3 @@ +//! Token helpers. + +pub use super::types::CommitToken; diff --git a/ray-rs/src/replication/transport.rs b/ray-rs/src/replication/transport.rs new file mode 100644 index 0000000..cffb259 --- /dev/null +++ b/ray-rs/src/replication/transport.rs @@ -0,0 +1,88 @@ +//! Transport payloads for pull/push replication. + +use crate::error::{KiteError, Result}; +use byteorder::{LittleEndian, ReadBytesExt}; +use std::io::{Cursor, Read}; + +const COMMIT_PAYLOAD_MAGIC: &[u8; 4] = b"RPL1"; +const COMMIT_PAYLOAD_HEADER_BYTES: usize = 16; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CommitFramePayload { + pub txid: u64, + pub wal_bytes: Vec, +} + +pub fn build_commit_payload_header( + txid: u64, + wal_len: usize, +) -> Result<[u8; COMMIT_PAYLOAD_HEADER_BYTES]> { + let wal_len = u32::try_from(wal_len).map_err(|_| { + KiteError::InvalidReplication(format!("replication commit payload too large: {}", wal_len)) + })?; + + let mut bytes = [0u8; COMMIT_PAYLOAD_HEADER_BYTES]; + bytes[..4].copy_from_slice(COMMIT_PAYLOAD_MAGIC); + bytes[4..12].copy_from_slice(&txid.to_le_bytes()); + bytes[12..16].copy_from_slice(&wal_len.to_le_bytes()); + Ok(bytes) +} + +pub fn encode_commit_frame_payload(txid: u64, wal_bytes: &[u8]) -> Result> { + let header = build_commit_payload_header(txid, wal_bytes.len())?; + let mut bytes = Vec::with_capacity(COMMIT_PAYLOAD_HEADER_BYTES + wal_bytes.len()); + bytes.extend_from_slice(&header); + bytes.extend_from_slice(wal_bytes); + Ok(bytes) +} + +pub fn decode_commit_frame_payload(payload: &[u8]) -> Result { + if payload.len() < COMMIT_PAYLOAD_HEADER_BYTES { + return Err(KiteError::InvalidReplication( + "replication commit payload too short".to_string(), + )); + } + + if &payload[..4] != COMMIT_PAYLOAD_MAGIC { + return Err(KiteError::InvalidReplication( + "replication commit payload has invalid magic".to_string(), + )); + } + + let mut cursor = Cursor::new(&payload[4..]); + let txid = cursor.read_u64::()?; + let wal_len = cursor.read_u32::()? as usize; + + let mut wal_bytes = vec![0; wal_len]; + cursor + .read_exact(&mut wal_bytes) + .map_err(|_| KiteError::InvalidReplication("replication payload truncated".to_string()))?; + + if cursor.position() as usize != payload.len() - 4 { + return Err(KiteError::InvalidReplication( + "replication payload contains unexpected trailing bytes".to_string(), + )); + } + + Ok(CommitFramePayload { txid, wal_bytes }) +} + +#[cfg(test)] +mod tests { + use super::{decode_commit_frame_payload, encode_commit_frame_payload}; + + #[test] + fn roundtrip_commit_payload() { + let bytes = encode_commit_frame_payload(77, b"abc").expect("encode"); + let decoded = decode_commit_frame_payload(&bytes).expect("decode"); + assert_eq!(decoded.txid, 77); + assert_eq!(decoded.wal_bytes, b"abc"); + } + + #[test] + fn rejects_bad_magic() { + let mut bytes = encode_commit_frame_payload(1, b"x").expect("encode"); + bytes[0] = b'X'; + assert!(decode_commit_frame_payload(&bytes).is_err()); + } +} diff --git a/ray-rs/src/replication/types.rs b/ray-rs/src/replication/types.rs new file mode 100644 index 0000000..5831acd --- /dev/null +++ b/ray-rs/src/replication/types.rs @@ -0,0 +1,238 @@ +//! Replication token/cursor types. + +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; +use std::fmt; +use std::str::FromStr; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)] +pub enum ReplicationRole { + #[default] + Disabled, + Primary, + Replica, +} + +impl fmt::Display for ReplicationRole { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let value = match self { + ReplicationRole::Disabled => "disabled", + ReplicationRole::Primary => "primary", + ReplicationRole::Replica => "replica", + }; + write!(f, "{value}") + } +} + +impl FromStr for ReplicationRole { + type Err = ReplicationParseError; + + fn from_str(raw: &str) -> Result { + match raw { + "disabled" => Ok(Self::Disabled), + "primary" => Ok(Self::Primary), + "replica" => Ok(Self::Replica), + _ => Err(ReplicationParseError::new(format!( + "invalid replication role: {raw}" + ))), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ReplicationParseError { + message: String, +} + +impl ReplicationParseError { + fn new(message: impl Into) -> Self { + Self { + message: message.into(), + } + } +} + +impl fmt::Display for ReplicationParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl std::error::Error for ReplicationParseError {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct CommitToken { + pub epoch: u64, + pub log_index: u64, +} + +impl CommitToken { + pub const fn new(epoch: u64, log_index: u64) -> Self { + Self { epoch, log_index } + } +} + +impl fmt::Display for CommitToken { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", self.epoch, self.log_index) + } +} + +impl Ord for CommitToken { + fn cmp(&self, other: &Self) -> Ordering { + self + .epoch + .cmp(&other.epoch) + .then_with(|| self.log_index.cmp(&other.log_index)) + } +} + +impl PartialOrd for CommitToken { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl FromStr for CommitToken { + type Err = ReplicationParseError; + + fn from_str(raw: &str) -> Result { + let mut parts = raw.split(':'); + let epoch = parse_u64_component(parts.next(), "epoch", raw)?; + let log_index = parse_u64_component(parts.next(), "log_index", raw)?; + + if parts.next().is_some() { + return Err(ReplicationParseError::new(format!( + "invalid token format: {raw}" + ))); + } + + Ok(Self::new(epoch, log_index)) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct ReplicationCursor { + pub epoch: u64, + pub segment_id: u64, + pub segment_offset: u64, + pub log_index: u64, +} + +impl ReplicationCursor { + pub const fn new(epoch: u64, segment_id: u64, segment_offset: u64, log_index: u64) -> Self { + Self { + epoch, + segment_id, + segment_offset, + log_index, + } + } +} + +impl fmt::Display for ReplicationCursor { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}:{}:{}:{}", + self.epoch, self.segment_id, self.segment_offset, self.log_index + ) + } +} + +impl Ord for ReplicationCursor { + fn cmp(&self, other: &Self) -> Ordering { + self + .epoch + .cmp(&other.epoch) + .then_with(|| self.log_index.cmp(&other.log_index)) + .then_with(|| self.segment_id.cmp(&other.segment_id)) + .then_with(|| self.segment_offset.cmp(&other.segment_offset)) + } +} + +impl PartialOrd for ReplicationCursor { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl FromStr for ReplicationCursor { + type Err = ReplicationParseError; + + fn from_str(raw: &str) -> Result { + let mut parts = raw.split(':'); + + let epoch = parse_u64_component(parts.next(), "epoch", raw)?; + let segment_id = parse_u64_component(parts.next(), "segment_id", raw)?; + let segment_offset = parse_u64_component(parts.next(), "segment_offset", raw)?; + let log_index = parse_u64_component(parts.next(), "log_index", raw)?; + + if parts.next().is_some() { + return Err(ReplicationParseError::new(format!( + "invalid cursor format: {raw}" + ))); + } + + Ok(Self::new(epoch, segment_id, segment_offset, log_index)) + } +} + +fn parse_u64_component( + value: Option<&str>, + component: &'static str, + original: &str, +) -> Result { + let value = value.ok_or_else(|| { + ReplicationParseError::new(format!( + "invalid replication identifier ({component} missing): {original}" + )) + })?; + + if value.is_empty() || !value.bytes().all(|byte| byte.is_ascii_digit()) { + return Err(ReplicationParseError::new(format!( + "invalid {component}: {value}" + ))); + } + + value.parse::().map_err(|_| { + ReplicationParseError::new(format!( + "invalid replication identifier ({component} overflow): {original}" + )) + }) +} + +#[cfg(test)] +mod tests { + use super::{CommitToken, ReplicationCursor}; + use rand::{rngs::StdRng, Rng, SeedableRng}; + use std::str::FromStr; + + #[test] + fn token_roundtrip_fuzz_like() { + let mut rng = StdRng::seed_from_u64(0xdecafbad); + + for _ in 0..2_000 { + let token = CommitToken::new(rng.gen_range(0..10_000), rng.gen_range(0..10_000_000)); + let parsed = CommitToken::from_str(&token.to_string()).expect("parse token"); + assert_eq!(parsed, token); + } + } + + #[test] + fn cursor_roundtrip_fuzz_like() { + let mut rng = StdRng::seed_from_u64(0xabba_cafe); + + for _ in 0..2_000 { + let cursor = ReplicationCursor::new( + rng.gen_range(0..1024), + rng.gen_range(0..4096), + rng.gen_range(0..1_000_000), + rng.gen_range(0..10_000_000), + ); + + let parsed = ReplicationCursor::from_str(&cursor.to_string()).expect("parse cursor"); + assert_eq!(parsed, cursor); + } + } +} diff --git a/ray-rs/src/types.rs b/ray-rs/src/types.rs index 1de08e1..4e63426 100644 --- a/ray-rs/src/types.rs +++ b/ray-rs/src/types.rs @@ -54,6 +54,7 @@ bitflags::bitflags! { const HAS_EDGE_BLOOM = 1 << 3; // future const HAS_NODE_LABELS = 1 << 4; const HAS_VECTORS = 1 << 5; + const HAS_VECTOR_STORES = 1 << 6; } } @@ -115,12 +116,15 @@ pub enum SectionId { NodeLabelIds = 24, VectorOffsets = 25, VectorData = 26, + VectorStoreIndex = 27, + VectorStoreData = 28, } impl SectionId { pub const COUNT_V1: usize = 23; pub const COUNT_V2: usize = 25; - pub const COUNT: usize = 27; + pub const COUNT_V3: usize = 27; + pub const COUNT: usize = 29; pub fn from_u32(v: u32) -> Option { match v { @@ -151,6 +155,8 @@ impl SectionId { 24 => Some(Self::NodeLabelIds), 25 => Some(Self::VectorOffsets), 26 => Some(Self::VectorData), + 27 => Some(Self::VectorStoreIndex), + 28 => Some(Self::VectorStoreData), _ => None, } } diff --git a/ray-rs/src/vector/compaction.rs b/ray-rs/src/vector/compaction.rs index 3c2b049..fa24055 100644 --- a/ray-rs/src/vector/compaction.rs +++ b/ray-rs/src/vector/compaction.rs @@ -513,4 +513,32 @@ mod tests { let did_compact = run_compaction_if_needed(&mut manifest, &strategy); assert!(!did_compact); } + + #[test] + fn test_compaction_preserves_live_vector_count() { + let mut manifest = create_test_manifest(4); + + for i in 0..200 { + let vector = vec![1.0 + i as f32, 2.0, 3.0, 4.0]; + vector_store_insert(&mut manifest, i, &vector).expect("expected value"); + } + vector_store_seal_active(&mut manifest); + + for i in 0..80 { + vector_store_delete(&mut manifest, i); + } + + let live_before = manifest.live_count(); + let strategy = CompactionStrategy { + min_deletion_ratio: 0.2, + max_fragments_per_compaction: 4, + min_vectors_to_compact: 1, + }; + assert!(run_compaction_if_needed(&mut manifest, &strategy)); + assert_eq!( + manifest.live_count(), + live_before, + "compaction must not change logical live vector count", + ); + } } diff --git a/ray-rs/src/vector/types.rs b/ray-rs/src/vector/types.rs index 3e4e5ce..819ce7b 100644 --- a/ray-rs/src/vector/types.rs +++ b/ray-rs/src/vector/types.rs @@ -359,7 +359,7 @@ impl VectorManifest { /// Get live vector count pub fn live_count(&self) -> usize { - self.total_vectors - self.total_deleted + self.node_to_vector.len() } } diff --git a/ray-rs/tests/replication_faults_phase_d.rs b/ray-rs/tests/replication_faults_phase_d.rs new file mode 100644 index 0000000..7f538e1 --- /dev/null +++ b/ray-rs/tests/replication_faults_phase_d.rs @@ -0,0 +1,308 @@ +use kitedb::core::single_file::{close_single_file, open_single_file, SingleFileOpenOptions}; +use kitedb::replication::types::ReplicationRole; + +fn open_primary( + path: &std::path::Path, + sidecar: &std::path::Path, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(sidecar) + .replication_segment_max_bytes(1024 * 1024) + .replication_retention_min_entries(128), + ) +} + +fn open_primary_with_segment_limit( + path: &std::path::Path, + sidecar: &std::path::Path, + segment_max_bytes: u64, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(sidecar) + .replication_segment_max_bytes(segment_max_bytes) + .replication_retention_min_entries(128), + ) +} + +fn open_replica( + replica_path: &std::path::Path, + source_db_path: &std::path::Path, + local_sidecar: &std::path::Path, + source_sidecar: &std::path::Path, +) -> kitedb::Result { + open_single_file( + replica_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Replica) + .replication_sidecar_path(local_sidecar) + .replication_source_db_path(source_db_path) + .replication_source_sidecar_path(source_sidecar), + ) +} + +fn active_segment_path(sidecar: &std::path::Path) -> std::path::PathBuf { + sidecar.join("segment-00000000000000000001.rlog") +} + +#[test] +fn corrupt_segment_sets_replica_last_error() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("fault-corrupt-primary.kitedb"); + let primary_sidecar = dir.path().join("fault-corrupt-primary.sidecar"); + let replica_path = dir.path().join("fault-corrupt-replica.kitedb"); + let replica_sidecar = dir.path().join("fault-corrupt-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar).expect("open primary"); + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + primary.begin(false).expect("begin c1"); + primary.create_node(Some("c1")).expect("create c1"); + primary + .commit_with_token() + .expect("commit c1") + .expect("token c1"); + close_single_file(primary).expect("close primary"); + + let segment_path = active_segment_path(&primary_sidecar); + let mut bytes = std::fs::read(&segment_path).expect("read segment"); + bytes[31] ^= 0xFF; + std::fs::write(&segment_path, &bytes).expect("write corrupted segment"); + + let err = replica + .replica_catch_up_once(32) + .expect_err("corrupted segment must fail catch-up"); + assert!( + err.to_string().contains("CRC mismatch"), + "unexpected corruption error: {err}" + ); + let status = replica.replica_replication_status().expect("status"); + assert!(status.last_error.is_some(), "last_error must be persisted"); + assert!(!status.needs_reseed); + + close_single_file(replica).expect("close replica"); +} + +#[test] +fn truncated_segment_sets_replica_last_error() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("fault-truncated-primary.kitedb"); + let primary_sidecar = dir.path().join("fault-truncated-primary.sidecar"); + let replica_path = dir.path().join("fault-truncated-replica.kitedb"); + let replica_sidecar = dir.path().join("fault-truncated-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar).expect("open primary"); + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + primary.begin(false).expect("begin c1"); + primary.create_node(Some("c1")).expect("create c1"); + primary + .commit_with_token() + .expect("commit c1") + .expect("token c1"); + close_single_file(primary).expect("close primary"); + + let segment_path = active_segment_path(&primary_sidecar); + let mut bytes = std::fs::read(&segment_path).expect("read segment"); + bytes.truncate(bytes.len().saturating_sub(1)); + std::fs::write(&segment_path, &bytes).expect("write truncated segment"); + + let err = replica + .replica_catch_up_once(32) + .expect_err("truncated segment must fail catch-up"); + assert!( + err.to_string().contains("truncated replication segment"), + "unexpected truncation error: {err}" + ); + let status = replica.replica_replication_status().expect("status"); + assert!(status.last_error.is_some(), "last_error must be persisted"); + assert!(!status.needs_reseed); + + close_single_file(replica).expect("close replica"); +} + +#[test] +fn obsolete_corrupt_segment_does_not_break_incremental_catch_up() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("fault-obsolete-corrupt-primary.kitedb"); + let primary_sidecar = dir.path().join("fault-obsolete-corrupt-primary.sidecar"); + let replica_path = dir.path().join("fault-obsolete-corrupt-replica.kitedb"); + let replica_sidecar = dir.path().join("fault-obsolete-corrupt-replica.sidecar"); + + let primary = + open_primary_with_segment_limit(&primary_path, &primary_sidecar, 1).expect("open primary"); + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + for i in 0..5 { + primary.begin(false).expect("begin seed"); + primary + .create_node(Some(&format!("seed-{i}"))) + .expect("create seed"); + primary + .commit_with_token() + .expect("commit seed") + .expect("token seed"); + } + + let initial = replica + .replica_catch_up_once(128) + .expect("initial catch-up"); + assert!(initial > 0, "replica must establish applied cursor"); + + let oldest_segment = active_segment_path(&primary_sidecar); + let mut bytes = std::fs::read(&oldest_segment).expect("read oldest segment"); + bytes[0] ^= 0xFF; + std::fs::write(&oldest_segment, &bytes).expect("corrupt obsolete segment"); + + primary.begin(false).expect("begin tail"); + primary.create_node(Some("tail")).expect("create tail"); + primary + .commit_with_token() + .expect("commit tail") + .expect("token tail"); + + let pulled = replica + .replica_catch_up_once(8) + .expect("catch-up should ignore obsolete corruption"); + assert!(pulled > 0, "replica must still pull newest frames"); + assert!( + !replica + .replica_replication_status() + .expect("replica status") + .needs_reseed + ); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[cfg(unix)] +#[test] +fn cursor_persist_failure_does_not_advance_in_memory_position() { + use std::os::unix::fs::PermissionsExt; + + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("fault-cursor-persist-primary.kitedb"); + let primary_sidecar = dir.path().join("fault-cursor-persist-primary.sidecar"); + let replica_path = dir.path().join("fault-cursor-persist-replica.kitedb"); + let replica_sidecar = dir.path().join("fault-cursor-persist-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar).expect("open primary"); + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + primary.begin(false).expect("begin c1"); + primary.create_node(Some("c1")).expect("create c1"); + primary + .commit_with_token() + .expect("commit c1") + .expect("token c1"); + + let before = replica + .replica_replication_status() + .expect("status before persist failure"); + + let original_mode = std::fs::metadata(&replica_sidecar) + .expect("replica sidecar metadata") + .permissions() + .mode(); + std::fs::set_permissions(&replica_sidecar, std::fs::Permissions::from_mode(0o555)) + .expect("set read-only sidecar permissions"); + + let err = replica + .replica_catch_up_once(32) + .expect_err("cursor persist failure must fail catch-up"); + assert!( + err.to_string().contains("cursor persist failed"), + "unexpected cursor persist failure: {err}" + ); + + let after = replica + .replica_replication_status() + .expect("status after persist failure"); + assert_eq!( + after.applied_log_index, before.applied_log_index, + "in-memory applied log index must not advance when cursor persistence fails" + ); + assert_eq!( + after.applied_epoch, before.applied_epoch, + "in-memory applied epoch must not advance when cursor persistence fails" + ); + + std::fs::set_permissions( + &replica_sidecar, + std::fs::Permissions::from_mode(original_mode), + ) + .expect("restore sidecar permissions"); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} diff --git a/ray-rs/tests/replication_metrics_phase_d.rs b/ray-rs/tests/replication_metrics_phase_d.rs new file mode 100644 index 0000000..ed405d7 --- /dev/null +++ b/ray-rs/tests/replication_metrics_phase_d.rs @@ -0,0 +1,2333 @@ +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::mpsc; +use std::sync::Mutex; +use std::thread; +use std::time::{Duration, Instant, SystemTime}; + +use kitedb::core::single_file::{close_single_file, open_single_file, SingleFileOpenOptions}; +use kitedb::metrics::{ + collect_metrics_single_file, collect_replication_metrics_otel_json_single_file, + collect_replication_metrics_otel_protobuf_single_file, + collect_replication_metrics_prometheus_single_file, push_replication_metrics_otel_grpc_payload, + push_replication_metrics_otel_grpc_payload_with_options, + push_replication_metrics_otel_json_payload, + push_replication_metrics_otel_json_payload_with_options, + push_replication_metrics_otel_protobuf_payload, render_replication_metrics_prometheus, + OtlpAdaptiveRetryMode, OtlpHttpPushOptions, OtlpHttpTlsOptions, +}; +use kitedb::replication::types::ReplicationRole; +use opentelemetry_proto::tonic::collector::metrics::v1::metrics_service_server::{ + MetricsService as OtelMetricsService, MetricsServiceServer as OtelMetricsServiceServer, +}; +use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest as OtelExportMetricsServiceRequest; +use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceResponse as OtelExportMetricsServiceResponse; +use prost::Message; + +fn open_primary( + path: &std::path::Path, + sidecar: &std::path::Path, + segment_max_bytes: u64, + retention_min_entries: u64, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(sidecar) + .replication_segment_max_bytes(segment_max_bytes) + .replication_retention_min_entries(retention_min_entries), + ) +} + +fn open_replica( + replica_path: &std::path::Path, + source_db_path: &std::path::Path, + local_sidecar: &std::path::Path, + source_sidecar: &std::path::Path, +) -> kitedb::Result { + open_single_file( + replica_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Replica) + .replication_sidecar_path(local_sidecar) + .replication_source_db_path(source_db_path) + .replication_source_sidecar_path(source_sidecar), + ) +} + +#[derive(Debug)] +struct CapturedHttpRequest { + request_line: String, + headers: HashMap, + body: Vec, +} + +#[derive(Debug)] +struct CapturedGrpcRequest { + authorization: Option, + resource_metrics_count: usize, + attempt: usize, +} + +#[derive(Debug)] +struct TestGrpcMetricsService { + tx: Mutex>>, + fail_first_attempts: usize, + attempts: AtomicUsize, +} + +#[tonic::async_trait] +impl OtelMetricsService for TestGrpcMetricsService { + async fn export( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status> { + let attempt = self.attempts.fetch_add(1, Ordering::SeqCst) + 1; + if attempt <= self.fail_first_attempts { + return Err(tonic::Status::unavailable("transient")); + } + let authorization = request + .metadata() + .get("authorization") + .and_then(|value| value.to_str().ok()) + .map(ToOwned::to_owned); + if let Some(sender) = self.tx.lock().expect("lock capture sender").take() { + sender + .send(CapturedGrpcRequest { + authorization, + resource_metrics_count: request.get_ref().resource_metrics.len(), + attempt, + }) + .expect("send grpc capture"); + } + Ok(tonic::Response::new(OtelExportMetricsServiceResponse { + partial_success: None, + })) + } +} + +fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option { + haystack + .windows(needle.len()) + .position(|window| window == needle) +} + +fn spawn_http_capture_server( + status_code: u16, + response_body: &str, +) -> ( + String, + mpsc::Receiver, + thread::JoinHandle<()>, +) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server"); + let address = listener.local_addr().expect("local addr"); + let endpoint = format!("http://{address}/v1/metrics"); + let response_body = response_body.to_string(); + let (tx, rx) = mpsc::channel::(); + + let handle = thread::spawn(move || { + let (mut stream, _) = listener.accept().expect("accept"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read request failed: {error}"), + } + } + + let end = header_end.expect("header terminator"); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + let mut lines = headers_text.lines(); + let request_line = lines.next().unwrap_or_default().to_string(); + let mut headers = HashMap::new(); + for line in lines { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + headers.insert(name.trim().to_ascii_lowercase(), value.trim().to_string()); + } + + let body_end = (end + content_length).min(buffer.len()); + let body = buffer[end..body_end].to_vec(); + tx.send(CapturedHttpRequest { + request_line, + headers, + body, + }) + .expect("send captured request"); + + let reason = if status_code == 200 { "OK" } else { "ERR" }; + let response = format!( + "HTTP/1.1 {status_code} {reason}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + response_body.len(), + response_body + ); + stream + .write_all(response.as_bytes()) + .expect("write response"); + }); + + (endpoint, rx, handle) +} + +fn spawn_http_sequence_capture_server( + status_codes: Vec, + response_body: &str, +) -> ( + String, + mpsc::Receiver>, + thread::JoinHandle<()>, +) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind sequence test server"); + let address = listener.local_addr().expect("sequence local addr"); + let endpoint = format!("http://{address}/v1/metrics"); + let response_body = response_body.to_string(); + let (tx, rx) = mpsc::channel::>(); + + let handle = thread::spawn(move || { + let mut captured = Vec::new(); + for status_code in status_codes { + let (mut stream, _) = listener.accept().expect("accept sequence"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read sequence request failed: {error}"), + } + } + + let end = header_end.expect("header terminator"); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + let mut lines = headers_text.lines(); + let request_line = lines.next().unwrap_or_default().to_string(); + let mut headers = HashMap::new(); + for line in lines { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + headers.insert(name.trim().to_ascii_lowercase(), value.trim().to_string()); + } + let body_end = (end + content_length).min(buffer.len()); + let body = buffer[end..body_end].to_vec(); + captured.push(CapturedHttpRequest { + request_line, + headers, + body, + }); + + let reason = if status_code == 200 { "OK" } else { "ERR" }; + let response = format!( + "HTTP/1.1 {status_code} {reason}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + response_body.len(), + response_body + ); + stream + .write_all(response.as_bytes()) + .expect("write sequence response"); + } + tx.send(captured).expect("send sequence captures"); + }); + + (endpoint, rx, handle) +} + +fn spawn_state_store_get_server(state_body: String) -> (String, thread::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind state store"); + let address = listener.local_addr().expect("state store local addr"); + let endpoint = format!("http://{address}/breaker-state"); + let handle = thread::spawn(move || { + let (mut stream, _) = listener.accept().expect("accept state store"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set state store read timeout"); + let mut buffer = Vec::new(); + let mut chunk = [0u8; 512]; + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if find_subsequence(&buffer, b"\r\n\r\n").is_some() { + break; + } + } + Err(error) => panic!("read state store request failed: {error}"), + } + } + let request_text = String::from_utf8_lossy(&buffer); + assert!( + request_text.starts_with("GET /breaker-state HTTP/1.1"), + "unexpected state store request: {request_text}" + ); + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + state_body.len(), + state_body + ); + stream + .write_all(response.as_bytes()) + .expect("write state store response"); + }); + (endpoint, handle) +} + +fn spawn_state_store_roundtrip_server() -> (String, thread::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind state store roundtrip"); + let address = listener + .local_addr() + .expect("state store roundtrip local addr"); + let endpoint = format!("http://{address}/breaker-state"); + let handle = thread::spawn(move || { + let mut stored_state = "{}".to_string(); + for expected_method in ["GET", "GET", "PUT", "GET"] { + let (mut stream, _) = listener.accept().expect("accept state store roundtrip"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set state store roundtrip read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read state store roundtrip request failed: {error}"), + } + } + + let end = header_end.expect("state store roundtrip header terminator"); + let request_text = String::from_utf8_lossy(&buffer[..end]); + let request_line = request_text.lines().next().unwrap_or_default(); + assert!( + request_line.starts_with(&format!("{expected_method} /breaker-state HTTP/1.1")), + "unexpected state store roundtrip request line: {request_line}" + ); + + if expected_method == "PUT" { + let body_end = (end + content_length).min(buffer.len()); + stored_state = String::from_utf8_lossy(&buffer[end..body_end]).to_string(); + } + + let response_body = if expected_method == "GET" { + stored_state.clone() + } else { + String::new() + }; + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + response_body.len(), + response_body + ); + stream + .write_all(response.as_bytes()) + .expect("write state store roundtrip response"); + } + }); + (endpoint, handle) +} + +fn spawn_state_store_cas_lease_server(expected_lease: String) -> (String, thread::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind state store cas lease"); + let address = listener + .local_addr() + .expect("state store cas lease local addr"); + let endpoint = format!("http://{address}/breaker-state"); + let handle = thread::spawn(move || { + for expected_method in ["GET", "GET", "PUT"] { + let (mut stream, _) = listener.accept().expect("accept state store cas lease"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set state store cas lease read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read state store cas lease request failed: {error}"), + } + } + + let end = header_end.expect("state store cas lease header terminator"); + let request_text = String::from_utf8_lossy(&buffer[..end]); + let request_line = request_text.lines().next().unwrap_or_default(); + assert!( + request_line.starts_with(&format!("{expected_method} /breaker-state HTTP/1.1")), + "unexpected state store cas lease request line: {request_line}" + ); + + let mut headers = HashMap::new(); + for line in request_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + headers.insert(name.trim().to_ascii_lowercase(), value.trim().to_string()); + } + + let lease_header = headers + .get("x-kitedb-breaker-lease") + .map(String::as_str) + .unwrap_or_default(); + assert_eq!( + lease_header, + expected_lease.as_str(), + "lease header mismatch" + ); + + if expected_method == "PUT" { + let if_match = headers + .get("if-match") + .map(String::as_str) + .unwrap_or_default(); + assert_eq!(if_match, "v1", "if-match header mismatch"); + } + + let (status_line, etag, body) = if expected_method == "PUT" { + ("HTTP/1.1 200 OK", "v2", "") + } else { + ("HTTP/1.1 200 OK", "v1", "{}") + }; + let response = format!( + "{status_line}\r\nContent-Type: application/json\r\nETag: {etag}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + body.len() + ); + stream + .write_all(response.as_bytes()) + .expect("write state store cas lease response"); + } + }); + (endpoint, handle) +} + +fn spawn_state_store_patch_server(expected_key: String) -> (String, thread::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind state store patch"); + let address = listener.local_addr().expect("state store patch local addr"); + let endpoint = format!("http://{address}/breaker-state"); + let handle = thread::spawn(move || { + for expected_method in ["GET", "GET", "PATCH"] { + let (mut stream, _) = listener.accept().expect("accept state store patch"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set state store patch read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read state store patch request failed: {error}"), + } + } + + let end = header_end.expect("state store patch header terminator"); + let request_text = String::from_utf8_lossy(&buffer[..end]); + let request_line = request_text.lines().next().unwrap_or_default(); + assert!( + request_line.starts_with(&format!("{expected_method} /breaker-state HTTP/1.1")), + "unexpected state store patch request line: {request_line}" + ); + + let mut headers = HashMap::new(); + for line in request_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + headers.insert(name.trim().to_ascii_lowercase(), value.trim().to_string()); + } + + assert_eq!( + headers.get("x-kitedb-breaker-mode").map(String::as_str), + Some("patch-v1"), + "patch mode header mismatch" + ); + assert_eq!( + headers.get("x-kitedb-breaker-key").map(String::as_str), + Some(expected_key.as_str()), + "patch key header mismatch" + ); + + if expected_method == "PATCH" { + let body_end = (end + content_length).min(buffer.len()); + let payload: serde_json::Value = + serde_json::from_slice(&buffer[end..body_end]).expect("parse patch payload"); + assert_eq!(payload["key"].as_str(), Some(expected_key.as_str())); + assert!(payload["state"].is_object(), "missing patch state object"); + } + + let (status_line, etag, body) = if expected_method == "PATCH" { + ("HTTP/1.1 200 OK", "p2", "") + } else { + ("HTTP/1.1 200 OK", "p1", "{}") + }; + let response = format!( + "{status_line}\r\nContent-Type: application/json\r\nETag: {etag}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + body.len() + ); + stream + .write_all(response.as_bytes()) + .expect("write state store patch response"); + } + }); + (endpoint, handle) +} + +fn spawn_state_store_patch_retry_server(expected_key: String) -> (String, thread::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind state store patch retry"); + let address = listener + .local_addr() + .expect("state store patch retry local addr"); + let endpoint = format!("http://{address}/breaker-state"); + let handle = thread::spawn(move || { + let mut patch_attempts = 0usize; + for expected_method in ["GET", "GET", "PATCH", "PATCH"] { + let (mut stream, _) = listener.accept().expect("accept state store patch retry"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set state store patch retry read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read state store patch retry request failed: {error}"), + } + } + + let end = header_end.expect("state store patch retry header terminator"); + let request_text = String::from_utf8_lossy(&buffer[..end]); + let request_line = request_text.lines().next().unwrap_or_default(); + assert!( + request_line.starts_with(&format!("{expected_method} /breaker-state HTTP/1.1")), + "unexpected state store patch retry request line: {request_line}" + ); + + let mut headers = HashMap::new(); + for line in request_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + headers.insert(name.trim().to_ascii_lowercase(), value.trim().to_string()); + } + + if expected_method == "PATCH" { + assert_eq!( + headers.get("x-kitedb-breaker-mode").map(String::as_str), + Some("patch-v1"), + "patch mode header mismatch" + ); + assert_eq!( + headers.get("x-kitedb-breaker-key").map(String::as_str), + Some(expected_key.as_str()), + "patch key header mismatch" + ); + let if_match = headers + .get("if-match") + .map(String::as_str) + .unwrap_or_default(); + patch_attempts = patch_attempts.saturating_add(1); + if patch_attempts == 1 { + assert_eq!( + if_match, "pr1", + "first patch if-match header should use GET ETag" + ); + let response = "HTTP/1.1 412 Precondition Failed\r\nETag: pr2\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"; + stream + .write_all(response.as_bytes()) + .expect("write patch retry precondition response"); + continue; + } + if patch_attempts == 2 { + assert_eq!(if_match, "pr2", "retry if-match header mismatch"); + } + } + + let (status_line, etag, body) = if expected_method == "GET" { + ("HTTP/1.1 200 OK", "pr1", "{}") + } else { + ("HTTP/1.1 200 OK", "pr3", "") + }; + let response = format!( + "{status_line}\r\nContent-Type: application/json\r\nETag: {etag}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + body.len() + ); + stream + .write_all(response.as_bytes()) + .expect("write state store patch retry response"); + } + }); + (endpoint, handle) +} + +fn spawn_state_store_patch_batch_server(expected_key: String) -> (String, thread::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind state store patch batch"); + let address = listener + .local_addr() + .expect("state store patch batch local addr"); + let endpoint = format!("http://{address}/breaker-state"); + let handle = thread::spawn(move || { + for expected_method in ["GET", "GET", "PATCH"] { + let (mut stream, _) = listener.accept().expect("accept state store patch batch"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set state store patch batch read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read state store patch batch request failed: {error}"), + } + } + + let end = header_end.expect("state store patch batch header terminator"); + let request_text = String::from_utf8_lossy(&buffer[..end]); + let request_line = request_text.lines().next().unwrap_or_default(); + assert!( + request_line.starts_with(&format!("{expected_method} /breaker-state HTTP/1.1")), + "unexpected state store patch batch request line: {request_line}" + ); + + let mut headers = HashMap::new(); + for line in request_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + headers.insert(name.trim().to_ascii_lowercase(), value.trim().to_string()); + } + + if expected_method == "PATCH" { + assert_eq!( + headers.get("x-kitedb-breaker-mode").map(String::as_str), + Some("patch-batch-v1"), + "patch batch mode header mismatch" + ); + assert_eq!( + headers.get("x-kitedb-breaker-key").map(String::as_str), + Some(expected_key.as_str()), + "patch batch key header mismatch" + ); + let body_end = (end + content_length).min(buffer.len()); + let payload: serde_json::Value = + serde_json::from_slice(&buffer[end..body_end]).expect("parse patch batch payload"); + let updates = payload["updates"].as_array().expect("updates array"); + assert!(!updates.is_empty(), "updates must not be empty"); + assert!( + updates.len() <= 2, + "updates should respect batch max keys, got {}", + updates.len() + ); + assert_eq!( + updates[0]["key"].as_str(), + Some(expected_key.as_str()), + "primary key must be first update" + ); + } + + let (status_line, etag, body) = if expected_method == "PATCH" { + ("HTTP/1.1 200 OK", "pb2", "") + } else { + ("HTTP/1.1 200 OK", "pb1", "{}") + }; + let response = format!( + "{status_line}\r\nContent-Type: application/json\r\nETag: {etag}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + body.len() + ); + stream + .write_all(response.as_bytes()) + .expect("write state store patch batch response"); + } + }); + (endpoint, handle) +} + +fn spawn_state_store_patch_merge_server( + expected_steps: Vec<(String, String)>, +) -> (String, thread::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind state store patch merge"); + let address = listener + .local_addr() + .expect("state store patch merge local addr"); + let endpoint = format!("http://{address}/breaker-state"); + let handle = thread::spawn(move || { + for (expected_key, expected_patch_mode) in expected_steps { + for expected_method in ["GET", "GET", "PATCH"] { + let (mut stream, _) = listener.accept().expect("accept state store patch merge"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set state store patch merge read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read state store patch merge request failed: {error}"), + } + } + + let end = header_end.expect("state store patch merge header terminator"); + let request_text = String::from_utf8_lossy(&buffer[..end]); + let request_line = request_text.lines().next().unwrap_or_default(); + assert!( + request_line.starts_with(&format!("{expected_method} /breaker-state HTTP/1.1")), + "unexpected state store patch merge request line: {request_line}" + ); + + let mut headers = HashMap::new(); + for line in request_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + headers.insert(name.trim().to_ascii_lowercase(), value.trim().to_string()); + } + + assert_eq!( + headers.get("x-kitedb-breaker-key").map(String::as_str), + Some(expected_key.as_str()), + "patch merge key header mismatch" + ); + if expected_method == "PATCH" { + assert_eq!( + headers.get("x-kitedb-breaker-mode").map(String::as_str), + Some(expected_patch_mode.as_str()), + "patch merge mode header mismatch" + ); + let body_end = (end + content_length).min(buffer.len()); + let payload: serde_json::Value = + serde_json::from_slice(&buffer[end..body_end]).expect("parse patch merge payload"); + if expected_patch_mode == "patch-merge-v1" { + assert_eq!( + payload["scope_key"].as_str(), + Some(expected_key.as_str()), + "patch merge scope key mismatch" + ); + assert!( + payload["total_keys"].as_u64().unwrap_or_default() >= 3, + "patch merge total_keys should include all tracked keys" + ); + assert_eq!( + payload["truncated"].as_bool(), + Some(true), + "patch merge payload should mark truncation" + ); + let updates = payload["updates"].as_array().expect("updates array"); + assert!(!updates.is_empty(), "updates must not be empty"); + assert!( + updates.len() <= 2, + "updates should respect merge max keys, got {}", + updates.len() + ); + assert_eq!( + updates[0]["key"].as_str(), + Some(expected_key.as_str()), + "primary key must be first update" + ); + } else { + assert_eq!( + payload["key"].as_str(), + Some(expected_key.as_str()), + "patch key mismatch" + ); + assert!(payload["state"].is_object(), "missing patch state object"); + } + } else { + assert_eq!( + headers.get("x-kitedb-breaker-mode").map(String::as_str), + Some("patch-v1"), + "patch merge GET mode header mismatch" + ); + } + + let etag = if expected_method == "PATCH" { + "pm2" + } else { + "pm1" + }; + let body = "{}"; + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nETag: {etag}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + body.len() + ); + stream + .write_all(response.as_bytes()) + .expect("write state store patch merge response"); + } + } + }); + (endpoint, handle) +} + +fn spawn_state_store_patch_merge_retry_server( + expected_key: String, +) -> (String, thread::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind state store patch merge retry"); + let address = listener + .local_addr() + .expect("state store patch merge retry local addr"); + let endpoint = format!("http://{address}/breaker-state"); + let handle = thread::spawn(move || { + let mut merge_attempts = 0usize; + for expected_method in ["GET", "GET", "PATCH", "PATCH"] { + let (mut stream, _) = listener + .accept() + .expect("accept state store patch merge retry"); + stream + .set_read_timeout(Some(Duration::from_secs(2))) + .expect("set state store patch merge retry read timeout"); + + let mut buffer = Vec::new(); + let mut chunk = [0u8; 1024]; + let mut header_end: Option = None; + let mut content_length = 0usize; + loop { + match stream.read(&mut chunk) { + Ok(0) => break, + Ok(read) => { + buffer.extend_from_slice(&chunk[..read]); + if header_end.is_none() { + if let Some(position) = find_subsequence(&buffer, b"\r\n\r\n") { + let end = position + 4; + header_end = Some(end); + let headers_text = String::from_utf8_lossy(&buffer[..end]); + for line in headers_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + if name.eq_ignore_ascii_case("content-length") { + content_length = value.trim().parse::().unwrap_or(0); + } + } + } + } + if let Some(end) = header_end { + if buffer.len() >= end + content_length { + break; + } + } + } + Err(error) => panic!("read state store patch merge retry request failed: {error}"), + } + } + + let end = header_end.expect("state store patch merge retry header terminator"); + let request_text = String::from_utf8_lossy(&buffer[..end]); + let request_line = request_text.lines().next().unwrap_or_default(); + assert!( + request_line.starts_with(&format!("{expected_method} /breaker-state HTTP/1.1")), + "unexpected state store patch merge retry request line: {request_line}" + ); + + let mut headers = HashMap::new(); + for line in request_text.lines().skip(1) { + let Some((name, value)) = line.split_once(':') else { + continue; + }; + headers.insert(name.trim().to_ascii_lowercase(), value.trim().to_string()); + } + assert_eq!( + headers.get("x-kitedb-breaker-key").map(String::as_str), + Some(expected_key.as_str()), + "patch merge retry key header mismatch" + ); + if expected_method == "PATCH" { + assert_eq!( + headers.get("x-kitedb-breaker-mode").map(String::as_str), + Some("patch-merge-v1"), + "patch merge retry mode header mismatch" + ); + let if_match = headers + .get("if-match") + .map(String::as_str) + .unwrap_or_default(); + merge_attempts = merge_attempts.saturating_add(1); + if merge_attempts == 1 { + assert_eq!( + if_match, "pmr1", + "first patch-merge if-match header should use GET ETag" + ); + let response = "HTTP/1.1 412 Precondition Failed\r\nETag: pmr2\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"; + stream + .write_all(response.as_bytes()) + .expect("write patch merge retry precondition response"); + continue; + } + if merge_attempts == 2 { + assert_eq!(if_match, "pmr2", "retry if-match header mismatch"); + } + } + + let (status_line, etag, body) = if expected_method == "GET" { + ("HTTP/1.1 200 OK", "pmr1", "{}") + } else { + ("HTTP/1.1 200 OK", "pmr3", "") + }; + let response = format!( + "{status_line}\r\nContent-Type: application/json\r\nETag: {etag}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + body.len() + ); + stream + .write_all(response.as_bytes()) + .expect("write state store patch merge retry response"); + } + }); + (endpoint, handle) +} + +fn spawn_grpc_capture_server( + fail_first_attempts: usize, +) -> ( + String, + mpsc::Receiver, + tokio::sync::oneshot::Sender<()>, + thread::JoinHandle<()>, +) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind grpc test server"); + let address = listener.local_addr().expect("grpc local addr"); + drop(listener); + let endpoint = format!("http://{address}"); + let (tx, rx) = mpsc::channel::(); + let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + + let handle = thread::spawn(move || { + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("create grpc runtime"); + runtime.block_on(async move { + let service = TestGrpcMetricsService { + tx: Mutex::new(Some(tx)), + fail_first_attempts, + attempts: AtomicUsize::new(0), + }; + tonic::transport::Server::builder() + .add_service(OtelMetricsServiceServer::new(service)) + .serve_with_shutdown(address, async move { + let _ = shutdown_rx.await; + }) + .await + .expect("serve grpc test endpoint"); + }); + }); + + (endpoint, rx, shutdown_tx, handle) +} + +#[test] +fn collect_metrics_exposes_primary_replication_fields() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("replication-metrics-primary.kitedb"); + let sidecar = dir.path().join("replication-metrics-primary.sidecar"); + + let primary = open_primary(&db_path, &sidecar, 1, 2).expect("open primary"); + + for i in 0..4 { + primary.begin(false).expect("begin"); + primary + .create_node(Some(&format!("p-{i}"))) + .expect("create node"); + let _ = primary.commit_with_token().expect("commit").expect("token"); + } + + primary + .primary_report_replica_progress("replica-a", 1, 2) + .expect("report replica progress"); + + let metrics = collect_metrics_single_file(&primary); + let otel = collect_replication_metrics_otel_json_single_file(&primary); + let otel_protobuf = collect_replication_metrics_otel_protobuf_single_file(&primary); + let prometheus = collect_replication_metrics_prometheus_single_file(&primary); + assert!(metrics.replication.enabled); + assert_eq!(metrics.replication.role, "primary"); + assert!(metrics.replication.replica.is_none()); + + let repl = metrics + .replication + .primary + .as_ref() + .expect("primary replication metrics"); + assert_eq!(repl.epoch, 1); + assert_eq!(repl.replica_count, 1); + assert_eq!(repl.stale_epoch_replica_count, 0); + assert_eq!(repl.min_replica_applied_log_index, Some(2)); + assert_eq!(repl.max_replica_lag, repl.head_log_index.saturating_sub(2)); + assert!(repl.append_attempts >= repl.append_successes); + assert_eq!(repl.append_failures, 0); + assert!(repl.append_successes >= 4); + assert!(repl.last_token.is_some()); + assert!(repl + .sidecar_path + .ends_with("replication-metrics-primary.sidecar")); + assert!(prometheus.contains("# HELP kitedb_replication_enabled")); + assert!(prometheus.contains("kitedb_replication_enabled{role=\"primary\"} 1")); + assert!(prometheus.contains("kitedb_replication_primary_head_log_index")); + assert!(prometheus.contains("kitedb_replication_primary_append_attempts_total")); + assert!(otel.contains("\"kitedb.replication.enabled\"")); + assert!(otel.contains("\"kitedb.replication.primary.head_log_index\"")); + assert!(otel.contains("\"kitedb.replication.primary.append_attempts\"")); + let otel_json: serde_json::Value = serde_json::from_str(&otel).expect("parse otel json"); + assert!(otel_json["resourceMetrics"] + .as_array() + .map(|values| !values.is_empty()) + .unwrap_or(false)); + let otel_proto = OtelExportMetricsServiceRequest::decode(otel_protobuf.as_slice()) + .expect("decode otel protobuf request"); + assert_eq!(otel_proto.resource_metrics.len(), 1); + let metric_names = otel_proto.resource_metrics[0] + .scope_metrics + .iter() + .flat_map(|scope| scope.metrics.iter().map(|metric| metric.name.clone())) + .collect::>(); + assert!(metric_names + .iter() + .any(|name| name == "kitedb.replication.enabled")); + assert!(metric_names + .iter() + .any(|name| name == "kitedb.replication.primary.head_log_index")); + assert!(metric_names + .iter() + .any(|name| name == "kitedb.replication.primary.append_attempts")); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn collect_metrics_exposes_replica_reseed_error_state() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir + .path() + .join("replication-metrics-replica-primary.kitedb"); + let primary_sidecar = dir + .path() + .join("replication-metrics-replica-primary.sidecar"); + let replica_path = dir.path().join("replication-metrics-replica.kitedb"); + let replica_sidecar = dir.path().join("replication-metrics-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar, 1, 2).expect("open primary"); + + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap replica"); + + for i in 0..5 { + primary.begin(false).expect("begin"); + primary + .create_node(Some(&format!("r-{i}"))) + .expect("create"); + primary.commit_with_token().expect("commit").expect("token"); + } + + let progress_path = primary_sidecar.join("replica-progress.json"); + if progress_path.exists() { + std::fs::remove_file(&progress_path).expect("remove persisted replica progress"); + } + let _ = primary.primary_run_retention().expect("run retention"); + + let err = replica + .replica_catch_up_once(32) + .expect_err("must need reseed"); + assert!(err.to_string().contains("reseed")); + + let metrics = collect_metrics_single_file(&replica); + let otel = collect_replication_metrics_otel_json_single_file(&replica); + let prometheus = render_replication_metrics_prometheus(&metrics); + assert!(metrics.replication.enabled); + assert_eq!(metrics.replication.role, "replica"); + assert!(metrics.replication.primary.is_none()); + + let repl = metrics + .replication + .replica + .as_ref() + .expect("replica replication metrics"); + assert!(repl.needs_reseed); + assert!( + repl + .last_error + .as_deref() + .unwrap_or_default() + .contains("reseed"), + "unexpected last_error: {:?}", + repl.last_error + ); + assert!(prometheus.contains("kitedb_replication_enabled{role=\"replica\"} 1")); + assert!(prometheus.contains("kitedb_replication_replica_needs_reseed 1")); + assert!(prometheus.contains("kitedb_replication_replica_last_error_present 1")); + assert!(otel.contains("\"kitedb.replication.replica.needs_reseed\"")); + assert!(otel.contains("\"kitedb.replication.replica.last_error_present\"")); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn replication_prometheus_export_reports_disabled_role() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("replication-metrics-disabled.kitedb"); + let db = open_single_file(&db_path, SingleFileOpenOptions::new()).expect("open db"); + + let metrics = collect_metrics_single_file(&db); + let otel = collect_replication_metrics_otel_json_single_file(&db); + let prometheus = render_replication_metrics_prometheus(&metrics); + assert!(!metrics.replication.enabled); + assert_eq!(metrics.replication.role, "disabled"); + assert!(prometheus.contains("kitedb_replication_enabled{role=\"disabled\"} 0")); + assert!(prometheus.contains("kitedb_replication_auth_enabled 0")); + assert!(otel.contains("\"kitedb.replication.enabled\"")); + assert!(otel.contains("\"role\"")); + assert!(otel.contains("\"disabled\"")); + + close_single_file(db).expect("close db"); +} + +#[test] +fn otlp_push_payload_validates_endpoint_and_timeout() { + let endpoint_err = push_replication_metrics_otel_json_payload("{}", " ", 1000, None) + .expect_err("empty endpoint must fail"); + assert!(endpoint_err.to_string().contains("endpoint")); + + let timeout_err = + push_replication_metrics_otel_json_payload("{}", "http://127.0.0.1:1/v1/metrics", 0, None) + .expect_err("zero timeout must fail"); + assert!(timeout_err.to_string().contains("timeout_ms")); +} + +#[test] +fn otlp_push_payload_posts_json_and_auth_header() { + let payload = "{\"resourceMetrics\":[]}"; + let (endpoint, captured_rx, handle) = spawn_http_capture_server(200, "ok"); + + let result = push_replication_metrics_otel_json_payload(payload, &endpoint, 2_000, Some("token")) + .expect("otlp push must succeed"); + assert_eq!(result.status_code, 200); + assert_eq!(result.response_body, "ok"); + + let captured = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured request"); + assert_eq!(captured.request_line, "POST /v1/metrics HTTP/1.1"); + assert_eq!( + captured.headers.get("content-type").map(String::as_str), + Some("application/json") + ); + assert_eq!( + captured.headers.get("authorization").map(String::as_str), + Some("Bearer token") + ); + assert_eq!(String::from_utf8_lossy(&captured.body), payload); + + handle.join().expect("server thread"); +} + +#[test] +fn otlp_push_protobuf_payload_posts_binary_and_auth_header() { + let payload = vec![0x0a, 0x03, 0x66, 0x6f, 0x6f]; + let (endpoint, captured_rx, handle) = spawn_http_capture_server(200, "ok"); + + let result = + push_replication_metrics_otel_protobuf_payload(&payload, &endpoint, 2_000, Some("token")) + .expect("otlp protobuf push must succeed"); + assert_eq!(result.status_code, 200); + assert_eq!(result.response_body, "ok"); + + let captured = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured request"); + assert_eq!(captured.request_line, "POST /v1/metrics HTTP/1.1"); + assert_eq!( + captured.headers.get("content-type").map(String::as_str), + Some("application/x-protobuf") + ); + assert_eq!( + captured.headers.get("authorization").map(String::as_str), + Some("Bearer token") + ); + assert_eq!(captured.body, payload); + + handle.join().expect("server thread"); +} + +#[test] +fn otlp_push_payload_retries_transient_http_failure() { + let payload = "{\"resourceMetrics\":[]}"; + let (endpoint, captured_rx, handle) = spawn_http_sequence_capture_server(vec![500, 200], "ok"); + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + retry_max_attempts: 2, + retry_backoff_ms: 1, + retry_backoff_max_ms: 1, + ..OtlpHttpPushOptions::default() + }; + + let result = + push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect("second attempt should succeed"); + assert_eq!(result.status_code, 200); + assert_eq!(result.response_body, "ok"); + + let captures = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured sequence requests"); + assert_eq!(captures.len(), 2); + assert_eq!( + String::from_utf8_lossy(&captures[0].body), + payload, + "first attempt payload mismatch" + ); + assert_eq!( + String::from_utf8_lossy(&captures[1].body), + payload, + "second attempt payload mismatch" + ); + handle.join().expect("sequence server thread"); +} + +#[test] +fn otlp_push_payload_gzip_sets_header_and_compresses_body() { + let payload = "{\"resourceMetrics\":[]}"; + let (endpoint, captured_rx, handle) = spawn_http_capture_server(200, "ok"); + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + compression_gzip: true, + ..OtlpHttpPushOptions::default() + }; + + let result = + push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect("gzip push should succeed"); + assert_eq!(result.status_code, 200); + + let captured = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured gzip request"); + assert_eq!( + captured.headers.get("content-encoding").map(String::as_str), + Some("gzip") + ); + assert!( + captured.body.starts_with(&[0x1f, 0x8b]), + "expected gzip magic bytes" + ); + handle.join().expect("server thread"); +} + +#[test] +fn otlp_push_payload_returns_error_on_non_success_status() { + let payload = "{\"resourceMetrics\":[]}"; + let (endpoint, _captured_rx, handle) = spawn_http_capture_server(401, "denied"); + + let error = push_replication_metrics_otel_json_payload(payload, &endpoint, 2_000, None) + .expect_err("non-2xx must fail"); + let message = error.to_string(); + assert!( + message.contains("status 401"), + "unexpected error: {message}" + ); + assert!(message.contains("denied"), "unexpected error: {message}"); + + handle.join().expect("server thread"); +} + +#[test] +fn otlp_push_payload_rejects_https_only_http_endpoint() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + bearer_token: None, + tls: OtlpHttpTlsOptions { + https_only: true, + ..OtlpHttpTlsOptions::default() + }, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("https_only should reject http endpoint"); + assert!(error.to_string().contains("https")); +} + +#[test] +fn otlp_push_payload_rejects_partial_mtls_paths() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + bearer_token: None, + tls: OtlpHttpTlsOptions { + client_cert_pem_path: Some("/tmp/client.crt".to_string()), + client_key_pem_path: None, + ..OtlpHttpTlsOptions::default() + }, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "https://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("partial mTLS path configuration should fail"); + assert!(error.to_string().contains("client_cert_pem_path")); + assert!(error.to_string().contains("client_key_pem_path")); +} + +#[test] +fn otlp_push_payload_rejects_zero_retry_attempts() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + retry_max_attempts: 0, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("zero retry attempts must be rejected"); + assert!(error.to_string().contains("retry_max_attempts")); +} + +#[test] +fn otlp_push_payload_rejects_invalid_retry_jitter_ratio() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + retry_jitter_ratio: 1.5, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("invalid jitter ratio must fail"); + assert!(error.to_string().contains("retry_jitter_ratio")); +} + +#[test] +fn otlp_push_payload_rejects_invalid_adaptive_retry_ewma_alpha() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + adaptive_retry: true, + adaptive_retry_mode: OtlpAdaptiveRetryMode::Ewma, + adaptive_retry_ewma_alpha: 1.5, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("invalid adaptive ewma alpha must fail"); + assert!(error.to_string().contains("adaptive_retry_ewma_alpha")); +} + +#[test] +fn otlp_push_payload_rejects_zero_half_open_probes_when_breaker_enabled() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 1_000, + circuit_breaker_half_open_probes: 0, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("zero half-open probes must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_half_open_probes")); +} + +#[test] +fn otlp_push_payload_rejects_conflicting_breaker_state_backends() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_path: Some("/tmp/otlp-breaker-state.json".to_string()), + circuit_breaker_state_url: Some("http://127.0.0.1:4318/state".to_string()), + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("conflicting state backend options must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_path and circuit_breaker_state_url")); +} + +#[test] +fn otlp_push_payload_rejects_state_cas_without_url() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_cas: true, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("state cas without url must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_cas requires circuit_breaker_state_url")); +} + +#[test] +fn otlp_push_payload_rejects_state_patch_without_url() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_patch: true, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("state patch without url must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_patch requires circuit_breaker_state_url")); +} + +#[test] +fn otlp_push_payload_rejects_state_patch_batch_without_patch() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_url: Some("http://127.0.0.1:4318/state".to_string()), + circuit_breaker_state_patch_batch: true, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("state patch batch without patch mode must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_patch_batch requires circuit_breaker_state_patch")); +} + +#[test] +fn otlp_push_payload_rejects_state_patch_batch_max_keys_zero() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_url: Some("http://127.0.0.1:4318/state".to_string()), + circuit_breaker_state_patch: true, + circuit_breaker_state_patch_batch_max_keys: 0, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("state patch batch max keys zero must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_patch_batch_max_keys")); +} + +#[test] +fn otlp_push_payload_rejects_state_patch_retry_max_attempts_zero() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_url: Some("http://127.0.0.1:4318/state".to_string()), + circuit_breaker_state_patch: true, + circuit_breaker_state_patch_retry_max_attempts: 0, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("state patch retry max attempts zero must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_patch_retry_max_attempts")); +} + +#[test] +fn otlp_push_payload_rejects_state_patch_merge_without_patch() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_url: Some("http://127.0.0.1:4318/state".to_string()), + circuit_breaker_state_patch_merge: true, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("state patch merge without patch mode must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_patch_merge requires circuit_breaker_state_patch")); +} + +#[test] +fn otlp_push_payload_rejects_state_patch_merge_max_keys_zero() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_url: Some("http://127.0.0.1:4318/state".to_string()), + circuit_breaker_state_patch: true, + circuit_breaker_state_patch_merge: true, + circuit_breaker_state_patch_merge_max_keys: 0, + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("state patch merge max keys zero must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_patch_merge_max_keys")); +} + +#[test] +fn otlp_push_payload_rejects_state_lease_without_url() { + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + circuit_breaker_state_lease_id: Some("lease-a".to_string()), + ..OtlpHttpPushOptions::default() + }; + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:4318/v1/metrics", + &options, + ) + .expect_err("state lease without url must fail"); + assert!(error + .to_string() + .contains("circuit_breaker_state_lease_id requires circuit_breaker_state_url")); +} + +#[test] +fn otlp_push_payload_circuit_breaker_opens_after_failure() { + let probe = TcpListener::bind("127.0.0.1:0").expect("bind probe"); + let port = probe.local_addr().expect("probe addr").port(); + drop(probe); + let endpoint = format!("http://127.0.0.1:{port}/v1/metrics"); + let options = OtlpHttpPushOptions { + timeout_ms: 100, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 50, + ..OtlpHttpPushOptions::default() + }; + + let first = push_replication_metrics_otel_json_payload_with_options("{}", &endpoint, &options) + .expect_err("first call should fail transport"); + assert!( + first.to_string().contains("transport"), + "unexpected first error: {first}" + ); + + let second = push_replication_metrics_otel_json_payload_with_options("{}", &endpoint, &options) + .expect_err("second call should be blocked by circuit breaker"); + assert!( + second.to_string().contains("circuit breaker open"), + "unexpected second error: {second}" + ); + + thread::sleep(Duration::from_millis(70)); + let third = push_replication_metrics_otel_json_payload_with_options("{}", &endpoint, &options) + .expect_err("third call should attempt again after breaker window"); + assert!( + !third.to_string().contains("circuit breaker open"), + "breaker should have closed, got: {third}" + ); +} + +#[test] +fn otlp_push_payload_half_open_probes_gate_recovery() { + let payload = "{\"resourceMetrics\":[]}"; + let (endpoint, captured_rx, handle) = + spawn_http_sequence_capture_server(vec![500, 200, 200, 500], "ok"); + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + retry_max_attempts: 1, + retry_backoff_ms: 1, + retry_backoff_max_ms: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 50, + circuit_breaker_half_open_probes: 2, + ..OtlpHttpPushOptions::default() + }; + + let first = push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect_err("first call should open breaker"); + assert!(first.to_string().contains("status 500")); + + let second = + push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect_err("breaker should block while open"); + assert!(second.to_string().contains("circuit breaker open")); + + thread::sleep(Duration::from_millis(70)); + let third = push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect("first half-open probe should pass"); + assert_eq!(third.status_code, 200); + + let fourth = + push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect("second half-open probe should pass"); + assert_eq!(fourth.status_code, 200); + + let fifth = push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect_err("fifth call should hit configured server failure"); + assert!( + !fifth.to_string().contains("circuit breaker open"), + "expected call to be attempted after successful probes, got: {fifth}" + ); + + let captures = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured half-open requests"); + assert_eq!( + captures.len(), + 4, + "blocked open-window call should not hit endpoint" + ); + handle.join().expect("half-open sequence server thread"); +} + +#[test] +fn otlp_push_payload_uses_persisted_shared_circuit_breaker_state() { + let dir = tempfile::tempdir().expect("tempdir"); + let state_path = dir.path().join("otlp-breaker-state.json"); + let now_ms = SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + let state_json = serde_json::json!({ + "shared-breaker": { + "consecutive_failures": 0, + "open_until_ms": now_ms + 5_000 + } + }); + std::fs::write( + &state_path, + serde_json::to_vec(&state_json).expect("serialize state"), + ) + .expect("write state"); + + let options = OtlpHttpPushOptions { + timeout_ms: 100, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 500, + circuit_breaker_state_path: Some(state_path.to_string_lossy().to_string()), + circuit_breaker_scope_key: Some("shared-breaker".to_string()), + ..OtlpHttpPushOptions::default() + }; + + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("persisted open breaker should block request"); + assert!( + error.to_string().contains("circuit breaker open"), + "unexpected error: {error}" + ); +} + +#[test] +fn otlp_push_payload_uses_shared_circuit_breaker_state_url() { + let now_ms = SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + let state_json = serde_json::json!({ + "shared-breaker-url": { + "consecutive_failures": 0, + "open_until_ms": now_ms + 5_000 + } + }) + .to_string(); + let (state_url, state_handle) = spawn_state_store_get_server(state_json); + + let options = OtlpHttpPushOptions { + timeout_ms: 200, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 500, + circuit_breaker_state_url: Some(state_url), + circuit_breaker_scope_key: Some("shared-breaker-url".to_string()), + ..OtlpHttpPushOptions::default() + }; + + let error = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("remote shared open breaker should block request"); + assert!( + error.to_string().contains("circuit breaker open"), + "unexpected error: {error}" + ); + state_handle.join().expect("state store thread"); +} + +#[test] +fn otlp_push_payload_shared_state_url_roundtrips_failure_open_state() { + let (state_url, state_handle) = spawn_state_store_roundtrip_server(); + let options = OtlpHttpPushOptions { + timeout_ms: 200, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_url: Some(state_url), + circuit_breaker_scope_key: Some("shared-roundtrip-breaker".to_string()), + ..OtlpHttpPushOptions::default() + }; + + let first = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("first call should fail transport and persist open state"); + assert!( + first.to_string().contains("transport"), + "unexpected first error: {first}" + ); + + let second = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("second call should be blocked by state loaded from shared url"); + assert!( + second.to_string().contains("circuit breaker open"), + "unexpected second error: {second}" + ); + + state_handle.join().expect("state store roundtrip thread"); +} + +#[test] +fn otlp_push_payload_shared_state_url_applies_cas_and_lease_headers() { + let (state_url, state_handle) = spawn_state_store_cas_lease_server("lease-cas-a".to_string()); + let options = OtlpHttpPushOptions { + timeout_ms: 200, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_url: Some(state_url), + circuit_breaker_state_cas: true, + circuit_breaker_state_lease_id: Some("lease-cas-a".to_string()), + circuit_breaker_scope_key: Some("shared-cas-breaker".to_string()), + ..OtlpHttpPushOptions::default() + }; + + let first = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("first call should fail transport and persist with CAS"); + assert!( + first.to_string().contains("transport"), + "unexpected first error: {first}" + ); + + state_handle.join().expect("state store cas lease thread"); +} + +#[test] +fn otlp_push_payload_shared_state_url_patch_protocol_uses_key_scoped_updates() { + let scope_key = "shared-patch-breaker"; + let (state_url, state_handle) = spawn_state_store_patch_server(scope_key.to_string()); + let options = OtlpHttpPushOptions { + timeout_ms: 200, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_url: Some(state_url), + circuit_breaker_state_patch: true, + circuit_breaker_scope_key: Some(scope_key.to_string()), + ..OtlpHttpPushOptions::default() + }; + + let first = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("first call should fail transport and persist key-scoped patch"); + assert!( + first.to_string().contains("transport"), + "unexpected first error: {first}" + ); + + state_handle.join().expect("state store patch thread"); +} + +#[test] +fn otlp_push_payload_shared_state_url_patch_protocol_retries_on_precondition_failure() { + let scope_key = "shared-patch-retry-breaker"; + let (state_url, state_handle) = spawn_state_store_patch_retry_server(scope_key.to_string()); + let options = OtlpHttpPushOptions { + timeout_ms: 200, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_url: Some(state_url), + circuit_breaker_state_patch: true, + circuit_breaker_state_cas: true, + circuit_breaker_state_patch_retry_max_attempts: 2, + circuit_breaker_scope_key: Some(scope_key.to_string()), + ..OtlpHttpPushOptions::default() + }; + + let first = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("first call should fail transport and persist key-scoped patch with retry"); + assert!( + first.to_string().contains("transport"), + "unexpected first error: {first}" + ); + + state_handle.join().expect("state store patch retry thread"); +} + +#[test] +fn otlp_push_payload_shared_state_url_patch_batch_protocol_uses_multi_key_updates() { + let scope_key = "shared-patch-batch-breaker"; + let (state_url, state_handle) = spawn_state_store_patch_batch_server(scope_key.to_string()); + let options = OtlpHttpPushOptions { + timeout_ms: 200, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_url: Some(state_url), + circuit_breaker_state_patch: true, + circuit_breaker_state_patch_batch: true, + circuit_breaker_state_patch_batch_max_keys: 2, + circuit_breaker_scope_key: Some(scope_key.to_string()), + ..OtlpHttpPushOptions::default() + }; + + let first = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("first call should fail transport and persist patch batch update"); + assert!( + first.to_string().contains("transport"), + "unexpected first error: {first}" + ); + + state_handle.join().expect("state store patch batch thread"); +} + +#[test] +fn otlp_push_payload_shared_state_url_patch_merge_protocol_compacts_high_cardinality_keys() { + let key_a = "shared-patch-merge-breaker-a"; + let key_b = "shared-patch-merge-breaker-b"; + let key_c = "shared-patch-merge-breaker-c"; + let (state_url, state_handle) = spawn_state_store_patch_merge_server(vec![ + (key_a.to_string(), "patch-v1".to_string()), + (key_b.to_string(), "patch-v1".to_string()), + (key_c.to_string(), "patch-merge-v1".to_string()), + ]); + + let base_options = OtlpHttpPushOptions { + timeout_ms: 200, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_url: Some(state_url), + circuit_breaker_state_patch: true, + circuit_breaker_state_patch_merge_max_keys: 2, + ..OtlpHttpPushOptions::default() + }; + + for scope_key in [key_a, key_b] { + let mut options = base_options.clone(); + options.circuit_breaker_scope_key = Some(scope_key.to_string()); + let first = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("preload call should fail transport and persist key-scoped patch"); + assert!( + first.to_string().contains("transport"), + "unexpected preload error: {first}" + ); + } + + let mut merge_options = base_options.clone(); + merge_options.circuit_breaker_scope_key = Some(key_c.to_string()); + merge_options.circuit_breaker_state_patch_merge = true; + let merged = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &merge_options, + ) + .expect_err("merge call should fail transport and persist compacted merge patch"); + assert!( + merged.to_string().contains("transport"), + "unexpected merge error: {merged}" + ); + + state_handle.join().expect("state store patch merge thread"); +} + +#[test] +fn otlp_push_payload_shared_state_url_patch_merge_protocol_retries_on_precondition_failure() { + let scope_key = "shared-patch-merge-retry-breaker"; + let (state_url, state_handle) = spawn_state_store_patch_merge_retry_server(scope_key.to_string()); + let options = OtlpHttpPushOptions { + timeout_ms: 200, + retry_max_attempts: 1, + circuit_breaker_failure_threshold: 1, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_url: Some(state_url), + circuit_breaker_state_patch: true, + circuit_breaker_state_patch_merge: true, + circuit_breaker_state_patch_retry_max_attempts: 2, + circuit_breaker_state_cas: true, + circuit_breaker_scope_key: Some(scope_key.to_string()), + ..OtlpHttpPushOptions::default() + }; + + let first = push_replication_metrics_otel_json_payload_with_options( + "{}", + "http://127.0.0.1:9/v1/metrics", + &options, + ) + .expect_err("first call should fail transport and persist merge patch with retry"); + assert!( + first.to_string().contains("transport"), + "unexpected first error: {first}" + ); + + state_handle + .join() + .expect("state store patch merge retry thread"); +} + +#[test] +fn otlp_push_payload_adaptive_retry_uses_failure_history() { + let dir = tempfile::tempdir().expect("tempdir"); + let state_path = dir.path().join("otlp-adaptive-state.json"); + let state_json = serde_json::json!({ + "adaptive-breaker": { + "consecutive_failures": 4, + "open_until_ms": 0 + } + }); + std::fs::write( + &state_path, + serde_json::to_vec(&state_json).expect("serialize adaptive state"), + ) + .expect("write adaptive state"); + + let payload = "{\"resourceMetrics\":[]}"; + let (endpoint, captured_rx, handle) = spawn_http_sequence_capture_server(vec![500, 200], "ok"); + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + retry_max_attempts: 2, + retry_backoff_ms: 80, + retry_backoff_max_ms: 2_000, + adaptive_retry: true, + retry_jitter_ratio: 0.0, + circuit_breaker_failure_threshold: 2, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_path: Some(state_path.to_string_lossy().to_string()), + circuit_breaker_scope_key: Some("adaptive-breaker".to_string()), + ..OtlpHttpPushOptions::default() + }; + + let start = Instant::now(); + let result = + push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect("adaptive retry second attempt should succeed"); + let elapsed = start.elapsed(); + assert_eq!(result.status_code, 200); + assert!( + elapsed >= Duration::from_millis(250), + "adaptive retry backoff too small: {:?}", + elapsed + ); + + let captures = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured adaptive requests"); + assert_eq!(captures.len(), 2); + handle.join().expect("adaptive sequence thread"); +} + +#[test] +fn otlp_push_payload_adaptive_retry_ewma_mode_uses_error_score() { + let dir = tempfile::tempdir().expect("tempdir"); + let state_path = dir.path().join("otlp-adaptive-ewma-state.json"); + let state_json = serde_json::json!({ + "adaptive-ewma-breaker": { + "consecutive_failures": 0, + "open_until_ms": 0, + "ewma_error_score": 0.75 + } + }); + std::fs::write( + &state_path, + serde_json::to_vec(&state_json).expect("serialize adaptive ewma state"), + ) + .expect("write adaptive ewma state"); + + let payload = "{\"resourceMetrics\":[]}"; + let (endpoint, captured_rx, handle) = spawn_http_sequence_capture_server(vec![500, 200], "ok"); + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + retry_max_attempts: 2, + retry_backoff_ms: 80, + retry_backoff_max_ms: 2_000, + retry_jitter_ratio: 0.0, + adaptive_retry: true, + adaptive_retry_mode: OtlpAdaptiveRetryMode::Ewma, + adaptive_retry_ewma_alpha: 0.5, + circuit_breaker_failure_threshold: 2, + circuit_breaker_open_ms: 2_000, + circuit_breaker_state_path: Some(state_path.to_string_lossy().to_string()), + circuit_breaker_scope_key: Some("adaptive-ewma-breaker".to_string()), + ..OtlpHttpPushOptions::default() + }; + + let start = Instant::now(); + let result = + push_replication_metrics_otel_json_payload_with_options(payload, &endpoint, &options) + .expect("adaptive ewma retry second attempt should succeed"); + let elapsed = start.elapsed(); + assert_eq!(result.status_code, 200); + assert!( + elapsed >= Duration::from_millis(450), + "adaptive ewma retry backoff too small: {:?}", + elapsed + ); + + let captures = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured adaptive ewma requests"); + assert_eq!(captures.len(), 2); + handle.join().expect("adaptive ewma sequence thread"); +} + +#[test] +fn otlp_push_grpc_payload_posts_request_and_auth_header() { + let payload = OtelExportMetricsServiceRequest { + resource_metrics: Vec::new(), + } + .encode_to_vec(); + let (endpoint, captured_rx, shutdown_tx, handle) = spawn_grpc_capture_server(0); + thread::sleep(Duration::from_millis(50)); + + let result = + push_replication_metrics_otel_grpc_payload(&payload, &endpoint, 2_000, Some("token")) + .expect("otlp grpc push must succeed"); + assert_eq!(result.status_code, 200); + + let captured = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured grpc request"); + assert_eq!(captured.authorization.as_deref(), Some("Bearer token")); + assert_eq!(captured.resource_metrics_count, 0); + assert_eq!(captured.attempt, 1); + + let _ = shutdown_tx.send(()); + handle.join().expect("grpc server thread"); +} + +#[test] +fn otlp_push_grpc_payload_retries_unavailable_once() { + let payload = OtelExportMetricsServiceRequest { + resource_metrics: Vec::new(), + } + .encode_to_vec(); + let (endpoint, captured_rx, shutdown_tx, handle) = spawn_grpc_capture_server(1); + thread::sleep(Duration::from_millis(50)); + let options = OtlpHttpPushOptions { + timeout_ms: 2_000, + retry_max_attempts: 2, + retry_backoff_ms: 1, + retry_backoff_max_ms: 1, + ..OtlpHttpPushOptions::default() + }; + + let result = + push_replication_metrics_otel_grpc_payload_with_options(&payload, &endpoint, &options) + .expect("second grpc attempt should succeed"); + assert_eq!(result.status_code, 200); + + let captured = captured_rx + .recv_timeout(Duration::from_secs(2)) + .expect("captured grpc retry request"); + assert_eq!(captured.attempt, 2); + + let _ = shutdown_tx.send(()); + handle.join().expect("grpc server thread"); +} + +#[test] +fn otlp_push_grpc_payload_rejects_invalid_protobuf() { + let error = push_replication_metrics_otel_grpc_payload( + &[0xff, 0x00, 0x12], + "http://127.0.0.1:4317", + 2_000, + None, + ) + .expect_err("invalid protobuf payload must fail"); + assert!(error.to_string().contains("Invalid OTLP protobuf payload")); +} diff --git a/ray-rs/tests/replication_phase_a.rs b/ray-rs/tests/replication_phase_a.rs new file mode 100644 index 0000000..56fd883 --- /dev/null +++ b/ray-rs/tests/replication_phase_a.rs @@ -0,0 +1,205 @@ +use std::str::FromStr; + +use kitedb::replication::log_store::{ReplicationFrame, SegmentLogStore}; +use kitedb::replication::manifest::{ManifestStore, ReplicationManifest, SegmentMeta}; +use kitedb::replication::types::{CommitToken, ReplicationCursor}; + +#[test] +fn commit_token_invalid_strings_rejected() { + let invalid = [ + "", "1", "1:", "1:2:3", "x:1", "1:y", "-1:2", "1:-2", " 1:2", "1:2 ", + ]; + + for raw in invalid { + assert!( + CommitToken::from_str(raw).is_err(), + "token should fail: {raw}" + ); + } +} + +#[test] +fn replication_cursor_invalid_strings_rejected() { + let invalid = [ + "", + "1:2:3", + "1:2:3:4:5", + "x:2:3:4", + "1:y:3:4", + "1:2:z:4", + "1:2:3:w", + "-1:2:3:4", + "1:2:-3:4", + "1:2:3:-4", + "1:2:3:4 ", + ]; + + for raw in invalid { + assert!( + ReplicationCursor::from_str(raw).is_err(), + "cursor should fail: {raw}" + ); + } +} + +#[test] +fn token_cursor_ordering_epoch_aware_and_monotonic() { + let t1 = CommitToken::new(1, 41); + let t2 = CommitToken::new(1, 42); + let t3 = CommitToken::new(2, 1); + assert!(t1 < t2); + assert!(t2 < t3); + + let c1 = ReplicationCursor::new(1, 1, 100, 10); + let c2 = ReplicationCursor::new(1, 1, 101, 10); + let c3 = ReplicationCursor::new(1, 2, 0, 11); + let c4 = ReplicationCursor::new(2, 0, 0, 0); + assert!(c1 < c2); + assert!(c2 < c3); + assert!(c3 < c4); +} + +#[test] +fn token_cursor_roundtrip_property() { + for epoch in [0_u64, 1, 7, 1024, u16::MAX as u64] { + for log_index in [0_u64, 1, 2, 99, 65_535] { + let token = CommitToken::new(epoch, log_index); + let parsed = CommitToken::from_str(&token.to_string()).expect("parse token"); + assert_eq!(parsed, token); + + let cursor = ReplicationCursor::new(epoch, epoch + 1, log_index + 2, log_index); + let parsed = ReplicationCursor::from_str(&cursor.to_string()).expect("parse cursor"); + assert_eq!(parsed, cursor); + } + } +} + +#[test] +fn manifest_interrupted_write_never_yields_partial_valid_state() { + let dir = tempfile::tempdir().expect("tempdir"); + let manifest_path = dir.path().join("replication-manifest.json"); + let store = ManifestStore::new(&manifest_path); + + let baseline = ReplicationManifest { + version: 1, + epoch: 3, + head_log_index: 41, + retained_floor: 7, + active_segment_id: 9, + segments: vec![SegmentMeta { + id: 9, + start_log_index: 1, + end_log_index: 41, + size_bytes: 2048, + }], + }; + store.write(&baseline).expect("write baseline"); + + let interrupted_tmp_path = manifest_path.with_extension("json.tmp"); + std::fs::write(&interrupted_tmp_path, b"{\"version\":1,\"epoch\":99") + .expect("write interrupted temp"); + + let loaded = store.read().expect("load manifest"); + assert_eq!(loaded, baseline); +} + +#[test] +fn manifest_reload_after_rewrite_is_deterministic() { + let dir = tempfile::tempdir().expect("tempdir"); + let manifest_path = dir.path().join("replication-manifest.json"); + let store = ManifestStore::new(&manifest_path); + + let first = ReplicationManifest { + version: 1, + epoch: 2, + head_log_index: 10, + retained_floor: 1, + active_segment_id: 1, + segments: vec![SegmentMeta { + id: 1, + start_log_index: 1, + end_log_index: 10, + size_bytes: 123, + }], + }; + let second = ReplicationManifest { + version: 1, + epoch: 2, + head_log_index: 11, + retained_floor: 1, + active_segment_id: 2, + segments: vec![ + SegmentMeta { + id: 1, + start_log_index: 1, + end_log_index: 10, + size_bytes: 123, + }, + SegmentMeta { + id: 2, + start_log_index: 11, + end_log_index: 11, + size_bytes: 64, + }, + ], + }; + + store.write(&first).expect("write first"); + assert_eq!(store.read().expect("read first"), first); + + store.write(&second).expect("write second"); + assert_eq!(store.read().expect("read second"), second); + + let reopened = ManifestStore::new(&manifest_path); + assert_eq!(reopened.read().expect("read reopened"), second); +} + +#[test] +fn segment_append_read_roundtrip_preserves_boundaries_indices() { + let dir = tempfile::tempdir().expect("tempdir"); + let segment_path = dir.path().join("segment-0001.rlog"); + + let mut writer = SegmentLogStore::create(&segment_path).expect("create segment"); + writer + .append(&ReplicationFrame::new(1, 1, b"alpha".to_vec())) + .expect("append 1"); + writer + .append(&ReplicationFrame::new(1, 2, vec![0, 1, 2, 3])) + .expect("append 2"); + writer + .append(&ReplicationFrame::new(1, 3, b"omega".to_vec())) + .expect("append 3"); + writer.sync().expect("sync"); + + let reader = SegmentLogStore::open(&segment_path).expect("open reader"); + let frames = reader.read_all().expect("read all"); + + assert_eq!(frames.len(), 3); + assert_eq!(frames[0].epoch, 1); + assert_eq!(frames[0].log_index, 1); + assert_eq!(frames[0].payload, b"alpha"); + assert_eq!(frames[1].log_index, 2); + assert_eq!(frames[1].payload, vec![0, 1, 2, 3]); + assert_eq!(frames[2].log_index, 3); + assert_eq!(frames[2].payload, b"omega"); +} + +#[test] +fn corrupt_segment_frame_checksum_fails_scan() { + let dir = tempfile::tempdir().expect("tempdir"); + let segment_path = dir.path().join("segment-0002.rlog"); + + let mut writer = SegmentLogStore::create(&segment_path).expect("create segment"); + writer + .append(&ReplicationFrame::new(4, 99, b"payload".to_vec())) + .expect("append"); + writer.sync().expect("sync"); + + let mut bytes = std::fs::read(&segment_path).expect("read bytes"); + let last = bytes.len() - 1; + bytes[last] ^= 0xFF; + std::fs::write(&segment_path, &bytes).expect("corrupt bytes"); + + let reader = SegmentLogStore::open(&segment_path).expect("open reader"); + assert!(reader.read_all().is_err(), "checksum mismatch must error"); +} diff --git a/ray-rs/tests/replication_phase_b.rs b/ray-rs/tests/replication_phase_b.rs new file mode 100644 index 0000000..e936785 --- /dev/null +++ b/ray-rs/tests/replication_phase_b.rs @@ -0,0 +1,231 @@ +use std::collections::HashSet; +use std::env; +use std::sync::{Arc, Barrier}; + +use kitedb::core::single_file::{close_single_file, open_single_file, SingleFileOpenOptions}; +use kitedb::replication::primary::default_replication_sidecar_path; +use kitedb::replication::types::CommitToken; +use kitedb::replication::types::ReplicationRole; + +const CRASH_BOUNDARY_CHILD_ENV: &str = "RAYDB_CRASH_BOUNDARY_CHILD"; +const CRASH_BOUNDARY_DB_PATH_ENV: &str = "RAYDB_CRASH_BOUNDARY_DB_PATH"; +const CRASH_BOUNDARY_TOKEN_PATH_ENV: &str = "RAYDB_CRASH_BOUNDARY_TOKEN_PATH"; + +#[test] +fn crash_boundary_child_process_helper() { + if env::var_os(CRASH_BOUNDARY_CHILD_ENV).is_none() { + return; + } + + let db_path = + std::path::PathBuf::from(env::var(CRASH_BOUNDARY_DB_PATH_ENV).expect("child db path env")); + let token_path = std::path::PathBuf::from( + env::var(CRASH_BOUNDARY_TOKEN_PATH_ENV).expect("child token path env"), + ); + + let primary = open_single_file( + &db_path, + SingleFileOpenOptions::new().replication_role(ReplicationRole::Primary), + ) + .expect("open child primary"); + primary.begin(false).expect("begin child tx"); + primary + .create_node(Some("crash-boundary")) + .expect("create crash-boundary node"); + let token = primary + .commit_with_token() + .expect("commit child tx") + .expect("commit token"); + std::fs::write(&token_path, token.to_string()).expect("persist emitted token"); + std::process::abort(); +} + +#[test] +fn commit_returns_monotonic_token_on_primary() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-b-primary.kitedb"); + + let db = open_single_file( + &db_path, + SingleFileOpenOptions::new().replication_role(ReplicationRole::Primary), + ) + .expect("open db"); + + let mut seen = Vec::new(); + for i in 0..4 { + db.begin(false).expect("begin"); + db.create_node(Some(&format!("n-{i}"))) + .expect("create node"); + let token = db + .commit_with_token() + .expect("commit") + .expect("primary token"); + seen.push(token); + } + + assert!(seen.windows(2).all(|window| window[0] < window[1])); + + let status = db.primary_replication_status().expect("replication status"); + assert_eq!(status.head_log_index, 4); + assert_eq!(status.last_token, seen.last().copied()); + + close_single_file(db).expect("close db"); +} + +#[test] +fn replication_disabled_mode_has_no_sidecar_activity() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-b-disabled.kitedb"); + + let db = open_single_file(&db_path, SingleFileOpenOptions::new()).expect("open db"); + db.begin(false).expect("begin"); + db.create_node(Some("plain")).expect("create node"); + let token = db.commit_with_token().expect("commit"); + assert!(token.is_none()); + + close_single_file(db).expect("close db"); + + let default_sidecar = default_replication_sidecar_path(&db_path); + assert!( + !default_sidecar.exists(), + "disabled mode must not create sidecar: {}", + default_sidecar.display() + ); +} + +#[test] +fn sidecar_append_failure_causes_commit_failure_without_token() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-b-failure.kitedb"); + + let db = open_single_file( + &db_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Primary) + .replication_fail_after_append_for_testing(0), + ) + .expect("open db"); + + db.begin(false).expect("begin"); + db.create_node(Some("boom")).expect("create node"); + let err = db.commit_with_token().expect_err("commit should fail"); + assert!( + err.to_string().contains("replication append"), + "unexpected error: {err}" + ); + + let status = db.primary_replication_status().expect("status"); + assert_eq!(status.head_log_index, 0); + assert_eq!(status.append_failures, 1); + assert!(db.last_commit_token().is_none()); + + close_single_file(db).expect("close db"); +} + +#[test] +fn concurrent_writers_have_contiguous_token_order() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-b-concurrent.kitedb"); + + let db = Arc::new( + open_single_file( + &db_path, + SingleFileOpenOptions::new().replication_role(ReplicationRole::Primary), + ) + .expect("open db"), + ); + + let threads = 8usize; + let barrier = Arc::new(Barrier::new(threads)); + let mut handles = Vec::with_capacity(threads); + + for i in 0..threads { + let db = Arc::clone(&db); + let barrier = Arc::clone(&barrier); + handles.push(std::thread::spawn(move || { + barrier.wait(); + db.begin(false).expect("begin"); + db.create_node(Some(&format!("t-{i}"))).expect("create"); + db.commit_with_token() + .expect("commit") + .expect("primary token") + })); + } + + let mut tokens = Vec::new(); + for handle in handles { + tokens.push(handle.join().expect("join")); + } + + let mut indices: Vec = tokens.iter().map(|token| token.log_index).collect(); + indices.sort_unstable(); + assert_eq!(indices, (1_u64..=threads as u64).collect::>()); + + let unique: HashSet = tokens.iter().map(|token| token.log_index).collect(); + assert_eq!(unique.len(), threads); + + let status = db.primary_replication_status().expect("status"); + assert_eq!(status.head_log_index, threads as u64); + + let db = Arc::into_inner(db).expect("sole owner"); + close_single_file(db).expect("close db"); +} + +#[test] +fn crash_after_commit_token_return_keeps_token_durable_on_reopen() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-b-crash-boundary.kitedb"); + let token_path = dir.path().join("phase-b-crash-boundary.token"); + + let status = std::process::Command::new(std::env::current_exe().expect("current test binary")) + .arg("--test-threads=1") + .arg("--exact") + .arg("crash_boundary_child_process_helper") + .arg("--nocapture") + .env(CRASH_BOUNDARY_CHILD_ENV, "1") + .env(CRASH_BOUNDARY_DB_PATH_ENV, db_path.as_os_str()) + .env(CRASH_BOUNDARY_TOKEN_PATH_ENV, token_path.as_os_str()) + .status() + .expect("spawn crash-boundary child"); + assert!( + !status.success(), + "child helper should crash to emulate abrupt process termination" + ); + + let token_raw = std::fs::read_to_string(&token_path).expect("read emitted token"); + let emitted_token = token_raw.parse::().expect("parse token"); + + let reopened = open_single_file( + &db_path, + SingleFileOpenOptions::new().replication_role(ReplicationRole::Primary), + ) + .expect("reopen primary after crash"); + let status = reopened + .primary_replication_status() + .expect("primary status"); + assert!( + status.head_log_index >= emitted_token.log_index, + "reopened head must include emitted token boundary: emitted={} reopened={}", + emitted_token.log_index, + status.head_log_index + ); + let exported = reopened + .primary_export_log_transport_json(None, 32, 1024 * 1024, false) + .expect("export log after crash reopen"); + let exported_json: serde_json::Value = serde_json::from_str(&exported).expect("parse export"); + let exported_has_token = exported_json["frames"] + .as_array() + .expect("frames array") + .iter() + .any(|frame| { + frame["epoch"].as_u64() == Some(emitted_token.epoch) + && frame["log_index"].as_u64() == Some(emitted_token.log_index) + }); + assert!( + exported_has_token, + "persisted log export must include emitted token {}:{}", + emitted_token.epoch, emitted_token.log_index + ); + + close_single_file(reopened).expect("close reopened primary"); +} diff --git a/ray-rs/tests/replication_phase_c.rs b/ray-rs/tests/replication_phase_c.rs new file mode 100644 index 0000000..c3d5cd2 --- /dev/null +++ b/ray-rs/tests/replication_phase_c.rs @@ -0,0 +1,326 @@ +use kitedb::core::single_file::{close_single_file, open_single_file, SingleFileOpenOptions}; +use kitedb::replication::primary::default_replication_sidecar_path; +use kitedb::replication::types::ReplicationRole; + +fn open_primary(path: &std::path::Path) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new().replication_role(ReplicationRole::Primary), + ) +} + +fn open_replica( + path: &std::path::Path, + primary_path: &std::path::Path, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Replica) + .replication_source_db_path(primary_path), + ) +} + +#[test] +fn replica_bootstrap_from_snapshot_reaches_primary_state() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("primary-bootstrap.kitedb"); + let replica_path = dir.path().join("replica-bootstrap.kitedb"); + + let primary = open_primary(&primary_path).expect("open primary"); + + primary.begin(false).expect("begin"); + let n1 = primary.create_node(Some("n1")).expect("n1"); + let n2 = primary.create_node(Some("n2")).expect("n2"); + primary.add_edge(n1, 1, n2).expect("edge"); + primary.commit_with_token().expect("commit").expect("token"); + + let replica = open_replica(&replica_path, &primary_path).expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + assert_eq!(replica.count_nodes(), primary.count_nodes()); + assert_eq!(replica.count_edges(), primary.count_edges()); + for node_id in primary.list_nodes() { + assert!(replica.node_exists(node_id)); + assert_eq!(replica.node_key(node_id), primary.node_key(node_id)); + } + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn incremental_catch_up_applies_frames_in_order() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("primary-catch-up.kitedb"); + let replica_path = dir.path().join("replica-catch-up.kitedb"); + + let primary = open_primary(&primary_path).expect("open primary"); + + primary.begin(false).expect("begin"); + primary.create_node(Some("base")).expect("create base"); + let base_token = primary + .commit_with_token() + .expect("commit") + .expect("base token"); + + let replica = open_replica(&replica_path, &primary_path).expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + let status = replica.replica_replication_status().expect("status"); + assert_eq!(status.applied_log_index, base_token.log_index); + + primary.begin(false).expect("begin c1"); + primary.create_node(Some("c1")).expect("create c1"); + let token1 = primary + .commit_with_token() + .expect("commit c1") + .expect("token c1"); + + primary.begin(false).expect("begin c2"); + primary.create_node(Some("c2")).expect("create c2"); + let token2 = primary + .commit_with_token() + .expect("commit c2") + .expect("token c2"); + + let pulled = replica.replica_catch_up_once(1).expect("pull one"); + assert_eq!(pulled, 1); + let status = replica + .replica_replication_status() + .expect("status after one"); + assert_eq!(status.applied_log_index, token1.log_index); + + let pulled = replica.replica_catch_up_once(8).expect("pull remaining"); + assert_eq!(pulled, 1); + let status = replica + .replica_replication_status() + .expect("status after remaining"); + assert_eq!(status.applied_log_index, token2.log_index); + + assert_eq!(replica.count_nodes(), primary.count_nodes()); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn duplicate_chunk_delivery_is_idempotent() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("primary-duplicate.kitedb"); + let replica_path = dir.path().join("replica-duplicate.kitedb"); + + let primary = open_primary(&primary_path).expect("open primary"); + primary.begin(false).expect("begin"); + primary.create_node(Some("a")).expect("create a"); + primary + .commit_with_token() + .expect("commit a") + .expect("token a"); + + let replica = open_replica(&replica_path, &primary_path).expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + primary.begin(false).expect("begin b"); + primary.create_node(Some("b")).expect("create b"); + primary + .commit_with_token() + .expect("commit b") + .expect("token b"); + + replica.replica_catch_up_once(8).expect("initial catch up"); + let node_count_before = replica.count_nodes(); + let status_before = replica.replica_replication_status().expect("status before"); + + let replayed = replica + .replica_catch_up_once_replaying_last_for_testing(1) + .expect("replay last chunk"); + assert_eq!(replayed, 0, "duplicate frame should be ignored"); + + let status_after = replica.replica_replication_status().expect("status after"); + assert_eq!( + status_after.applied_log_index, + status_before.applied_log_index + ); + assert_eq!(replica.count_nodes(), node_count_before); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn replica_restart_resumes_from_durable_cursor() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("primary-resume.kitedb"); + let replica_path = dir.path().join("replica-resume.kitedb"); + + let primary = open_primary(&primary_path).expect("open primary"); + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica(&replica_path, &primary_path).expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + primary.begin(false).expect("begin c1"); + primary.create_node(Some("c1")).expect("create c1"); + let t1 = primary + .commit_with_token() + .expect("commit c1") + .expect("token c1"); + + primary.begin(false).expect("begin c2"); + primary.create_node(Some("c2")).expect("create c2"); + let t2 = primary + .commit_with_token() + .expect("commit c2") + .expect("token c2"); + + let pulled = replica + .replica_catch_up_once(1) + .expect("pull one before restart"); + assert_eq!(pulled, 1); + assert_eq!( + replica + .replica_replication_status() + .expect("status") + .applied_log_index, + t1.log_index + ); + + close_single_file(replica).expect("close replica"); + + let replica = open_replica(&replica_path, &primary_path).expect("reopen replica"); + let status = replica + .replica_replication_status() + .expect("status after reopen"); + assert_eq!(status.applied_log_index, t1.log_index); + + let pulled = replica.replica_catch_up_once(8).expect("pull after reopen"); + assert_eq!(pulled, 1); + assert_eq!( + replica + .replica_replication_status() + .expect("status final") + .applied_log_index, + t2.log_index + ); + assert_eq!(replica.count_nodes(), primary.count_nodes()); + + close_single_file(replica).expect("close replica final"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn wait_for_token_times_out_then_succeeds_after_catch_up() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("primary-wait.kitedb"); + let replica_path = dir.path().join("replica-wait.kitedb"); + + let primary = open_primary(&primary_path).expect("open primary"); + let _primary_sidecar = default_replication_sidecar_path(&primary_path); + + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica(&replica_path, &primary_path).expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + primary.begin(false).expect("begin next"); + primary.create_node(Some("next")).expect("create next"); + let token = primary + .commit_with_token() + .expect("commit next") + .expect("token next"); + + let timed_out = replica.wait_for_token(token, 20).expect("wait timeout"); + assert!(!timed_out, "token should not be visible before catch-up"); + + replica.replica_catch_up_once(8).expect("catch up"); + + let reached = replica.wait_for_token(token, 1_000).expect("wait success"); + assert!(reached, "token should be visible after catch-up"); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn vector_property_mutations_replicate_and_delete() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("primary-vector-repl.kitedb"); + let replica_path = dir.path().join("replica-vector-repl.kitedb"); + + let primary = open_primary(&primary_path).expect("open primary"); + primary.begin(false).expect("begin base"); + let node = primary + .create_node(Some("vec-node")) + .expect("create vec node"); + let embedding_key = primary.define_propkey("embedding").expect("define propkey"); + primary + .set_node_vector(node, embedding_key, &[0.1, 0.2, 0.3]) + .expect("set vector"); + let token_set = primary + .commit_with_token() + .expect("commit set") + .expect("token set"); + + let replica = open_replica(&replica_path, &primary_path).expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + let status = replica.replica_replication_status().expect("status"); + assert_eq!(status.applied_log_index, token_set.log_index); + let primary_vector = primary + .node_vector(node, embedding_key) + .expect("primary vector after commit"); + let replica_vector = replica + .node_vector(node, embedding_key) + .expect("replica vector after bootstrap"); + assert_eq!(replica_vector.len(), primary_vector.len()); + for (replica_value, primary_value) in replica_vector.iter().zip(primary_vector.iter()) { + assert!( + (replica_value - primary_value).abs() <= 1e-6, + "vector mismatch: replica={replica_value}, primary={primary_value}" + ); + } + + primary.begin(false).expect("begin delete"); + primary + .delete_node_vector(node, embedding_key) + .expect("delete vector"); + let token_delete = primary + .commit_with_token() + .expect("commit delete") + .expect("token delete"); + + let pulled = replica.replica_catch_up_once(8).expect("catch up delete"); + assert_eq!(pulled, 1); + let status = replica + .replica_replication_status() + .expect("status after delete"); + assert_eq!(status.applied_log_index, token_delete.log_index); + assert!( + replica.node_vector(node, embedding_key).is_none(), + "vector delete should replicate" + ); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} diff --git a/ray-rs/tests/replication_phase_d.rs b/ray-rs/tests/replication_phase_d.rs new file mode 100644 index 0000000..731875f --- /dev/null +++ b/ray-rs/tests/replication_phase_d.rs @@ -0,0 +1,1270 @@ +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use std::{env, process::Command}; + +use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; +use base64::Engine; +use kitedb::core::single_file::{ + close_single_file, open_single_file, SingleFileOpenOptions, SyncMode, +}; +use kitedb::replication::types::ReplicationRole; + +fn open_primary( + path: &std::path::Path, + sidecar: &std::path::Path, + segment_max_bytes: u64, + retention_min_entries: u64, +) -> kitedb::Result { + open_primary_with_sync( + path, + sidecar, + segment_max_bytes, + retention_min_entries, + SyncMode::Full, + ) +} + +fn open_primary_with_sync( + path: &std::path::Path, + sidecar: &std::path::Path, + segment_max_bytes: u64, + retention_min_entries: u64, + sync_mode: SyncMode, +) -> kitedb::Result { + open_single_file( + path, + SingleFileOpenOptions::new() + .sync_mode(sync_mode) + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(sidecar) + .replication_segment_max_bytes(segment_max_bytes) + .replication_retention_min_entries(retention_min_entries), + ) +} + +fn open_replica( + replica_path: &std::path::Path, + source_db_path: &std::path::Path, + local_sidecar: &std::path::Path, + source_sidecar: &std::path::Path, +) -> kitedb::Result { + open_single_file( + replica_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Replica) + .replication_sidecar_path(local_sidecar) + .replication_source_db_path(source_db_path) + .replication_source_sidecar_path(source_sidecar), + ) +} + +const PRIMARY_LOCK_CHILD_ENV: &str = "RAYDB_PRIMARY_LOCK_CHILD"; +const PRIMARY_LOCK_CHILD_DB_PATH_ENV: &str = "RAYDB_PRIMARY_LOCK_CHILD_DB_PATH"; +const PRIMARY_LOCK_CHILD_SIDECAR_PATH_ENV: &str = "RAYDB_PRIMARY_LOCK_CHILD_SIDECAR_PATH"; + +#[test] +fn primary_lock_probe_child_process_helper() { + if env::var_os(PRIMARY_LOCK_CHILD_ENV).is_none() { + return; + } + + let db_path = + std::path::PathBuf::from(env::var(PRIMARY_LOCK_CHILD_DB_PATH_ENV).expect("child db path env")); + let sidecar_path = std::path::PathBuf::from( + env::var(PRIMARY_LOCK_CHILD_SIDECAR_PATH_ENV).expect("child sidecar path env"), + ); + + let exit_code = match open_primary(&db_path, &sidecar_path, 256, 8) { + Ok(primary) => { + let _ = close_single_file(primary); + 1 + } + Err(_) => 0, + }; + std::process::exit(exit_code); +} + +#[test] +fn promotion_increments_epoch_and_fences_stale_primary_writes() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-promote.kitedb"); + let sidecar = dir.path().join("phase-d-promote.sidecar"); + + let primary_a = open_primary(&db_path, &sidecar, 256, 4).expect("open primary a"); + let primary_b = open_primary(&db_path, &sidecar, 256, 4).expect("open primary b"); + + primary_a.begin(false).expect("begin a"); + primary_a.create_node(Some("a0")).expect("create a0"); + let t0 = primary_a + .commit_with_token() + .expect("commit a0") + .expect("token a0"); + assert_eq!(t0.epoch, 1); + + let new_epoch = primary_b.primary_promote_to_next_epoch().expect("promote"); + assert_eq!(new_epoch, 2); + + primary_b.begin(false).expect("begin b"); + primary_b.create_node(Some("b0")).expect("create b0"); + let t1 = primary_b + .commit_with_token() + .expect("commit b0") + .expect("token b0"); + assert_eq!(t1.epoch, 2); + + primary_a.begin(false).expect("begin stale"); + primary_a.create_node(Some("stale")).expect("create stale"); + let err = primary_a + .commit_with_token() + .expect_err("stale primary commit must fail"); + assert!( + err.to_string().contains("stale primary"), + "unexpected stale commit error: {err}" + ); + + close_single_file(primary_b).expect("close b"); + close_single_file(primary_a).expect("close a"); +} + +#[test] +fn promotion_fences_stale_primary_writes_in_normal_sync_mode() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-promote-normal-sync.kitedb"); + let sidecar = dir.path().join("phase-d-promote-normal-sync.sidecar"); + + let primary_a = + open_primary_with_sync(&db_path, &sidecar, 256, 4, SyncMode::Normal).expect("open primary a"); + let primary_b = + open_primary_with_sync(&db_path, &sidecar, 256, 4, SyncMode::Normal).expect("open primary b"); + + primary_a.begin(false).expect("begin a"); + primary_a.create_node(Some("a0")).expect("create a0"); + let t0 = primary_a + .commit_with_token() + .expect("commit a0") + .expect("token a0"); + assert_eq!(t0.epoch, 1); + + let new_epoch = primary_b.primary_promote_to_next_epoch().expect("promote"); + assert_eq!(new_epoch, 2); + + primary_a.begin(false).expect("begin stale"); + primary_a.create_node(Some("stale")).expect("create stale"); + let err = primary_a + .commit_with_token() + .expect_err("stale primary commit must fail immediately in normal sync mode"); + assert!( + err.to_string().contains("stale primary"), + "unexpected stale commit error: {err}" + ); + + close_single_file(primary_b).expect("close b"); + close_single_file(primary_a).expect("close a"); +} + +#[test] +fn primary_open_rejects_sidecar_when_other_process_holds_primary_lock() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-process-lock.kitedb"); + let sidecar = dir.path().join("phase-d-process-lock.sidecar"); + let primary = open_primary(&db_path, &sidecar, 256, 8).expect("open parent primary"); + + let status = Command::new(std::env::current_exe().expect("current test binary")) + .arg("--test-threads=1") + .arg("--exact") + .arg("primary_lock_probe_child_process_helper") + .arg("--nocapture") + .env(PRIMARY_LOCK_CHILD_ENV, "1") + .env(PRIMARY_LOCK_CHILD_DB_PATH_ENV, db_path.as_os_str()) + .env(PRIMARY_LOCK_CHILD_SIDECAR_PATH_ENV, sidecar.as_os_str()) + .status() + .expect("spawn child probe"); + + assert!( + status.success(), + "child process unexpectedly opened primary with same sidecar lock" + ); + close_single_file(primary).expect("close parent primary"); +} + +#[test] +fn retention_respects_active_replica_cursor_and_minimum_window() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-retention.kitedb"); + let sidecar = dir.path().join("phase-d-retention.sidecar"); + + let primary = open_primary(&db_path, &sidecar, 1, 2).expect("open primary"); + + for i in 0..6 { + primary.begin(false).expect("begin"); + primary + .create_node(Some(&format!("n-{i}"))) + .expect("create"); + let _ = primary.commit_with_token().expect("commit").expect("token"); + } + + primary + .primary_report_replica_progress("replica-a", 1, 2) + .expect("report cursor"); + + let prune = primary.primary_run_retention().expect("run retention"); + assert!(prune.pruned_segments > 0); + + let status = primary.primary_replication_status().expect("status"); + assert_eq!(status.retained_floor, 3); + assert!(status + .replica_lags + .iter() + .any(|lag| lag.replica_id == "replica-a" && lag.applied_log_index == 2)); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn retention_uses_replica_progress_without_manual_report_calls() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-auto-progress-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-auto-progress-primary.sidecar"); + let replica_path = dir.path().join("phase-d-auto-progress-replica.kitedb"); + let replica_sidecar = dir.path().join("phase-d-auto-progress-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar, 1, 1).expect("open primary"); + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + for i in 0..3 { + primary.begin(false).expect("begin warmup"); + primary + .create_node(Some(&format!("warmup-{i}"))) + .expect("create warmup"); + primary.commit_with_token().expect("commit warmup"); + } + + let warmup_pulled = replica.replica_catch_up_once(64).expect("warmup catch-up"); + assert!(warmup_pulled > 0, "replica should apply warmup frames"); + + for i in 0..4 { + primary.begin(false).expect("begin backlog"); + primary + .create_node(Some(&format!("backlog-{i}"))) + .expect("create backlog"); + primary.commit_with_token().expect("commit backlog"); + } + + let prune = primary.primary_run_retention().expect("run retention"); + assert!(prune.pruned_segments > 0, "test needs actual pruning"); + + let backlog_pulled = replica + .replica_catch_up_once(64) + .expect("replica should catch up without reseed after retention"); + assert!(backlog_pulled > 0, "replica should pull backlog frames"); + assert_eq!(replica.count_nodes(), primary.count_nodes()); + assert!( + !replica + .replica_replication_status() + .expect("replica status") + .needs_reseed, + "auto progress should prevent retention-induced reseed" + ); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn missing_segment_marks_replica_needs_reseed() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-missing-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-missing-primary.sidecar"); + let replica_path = dir.path().join("phase-d-missing-replica.kitedb"); + let replica_sidecar = dir.path().join("phase-d-missing-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar, 1, 2).expect("open primary"); + + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + for i in 0..4 { + primary.begin(false).expect("begin"); + primary + .create_node(Some(&format!("m-{i}"))) + .expect("create"); + primary.commit_with_token().expect("commit").expect("token"); + } + + let progress_path = primary_sidecar.join("replica-progress.json"); + if progress_path.exists() { + std::fs::remove_file(&progress_path).expect("remove persisted replica progress"); + } + let _ = primary.primary_run_retention().expect("run retention"); + + let err = replica + .replica_catch_up_once(32) + .expect_err("replica should require reseed"); + assert!(err.to_string().contains("reseed")); + + let status = replica + .replica_replication_status() + .expect("replica status"); + assert!(status.needs_reseed); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn lagging_replica_reseed_recovers_after_retention_gap() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-reseed-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-reseed-primary.sidecar"); + let replica_path = dir.path().join("phase-d-reseed-replica.kitedb"); + let replica_sidecar = dir.path().join("phase-d-reseed-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar, 1, 2).expect("open primary"); + + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + for i in 0..5 { + primary.begin(false).expect("begin"); + primary + .create_node(Some(&format!("r-{i}"))) + .expect("create"); + primary.commit_with_token().expect("commit").expect("token"); + } + + let progress_path = primary_sidecar.join("replica-progress.json"); + if progress_path.exists() { + std::fs::remove_file(&progress_path).expect("remove persisted replica progress"); + } + let _ = primary.primary_run_retention().expect("run retention"); + + let _ = replica + .replica_catch_up_once(32) + .expect_err("must need reseed"); + assert!( + replica + .replica_replication_status() + .expect("status") + .needs_reseed + ); + + replica.replica_reseed_from_snapshot().expect("reseed"); + assert!( + !replica + .replica_replication_status() + .expect("status post reseed") + .needs_reseed + ); + assert_eq!(replica.count_nodes(), primary.count_nodes()); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn lagging_replica_across_epoch_retention_gap_requires_reseed() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-epoch-gap-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-epoch-gap-primary.sidecar"); + let replica_path = dir.path().join("phase-d-epoch-gap-replica.kitedb"); + let replica_sidecar = dir.path().join("phase-d-epoch-gap-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar, 1, 8).expect("open primary"); + + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + for i in 0..24 { + primary.begin(false).expect("begin pre-promotion"); + primary + .create_node(Some(&format!("pre-{i}"))) + .expect("create pre"); + primary + .commit_with_token() + .expect("commit pre") + .expect("token pre"); + } + + let new_epoch = primary + .primary_promote_to_next_epoch() + .expect("promote to next epoch"); + assert_eq!(new_epoch, 2); + + for i in 0..24 { + primary.begin(false).expect("begin post-promotion"); + primary + .create_node(Some(&format!("post-{i}"))) + .expect("create post"); + primary + .commit_with_token() + .expect("commit post") + .expect("token post"); + } + + let prune = primary.primary_run_retention().expect("run retention"); + assert!( + prune.pruned_segments > 0, + "test setup requires actual segment pruning" + ); + + let target_head = primary + .primary_replication_status() + .expect("primary replication status") + .head_log_index; + + let mut needs_reseed = false; + for _ in 0..16 { + match replica.replica_catch_up_once(64) { + Ok(_) => { + let status = replica + .replica_replication_status() + .expect("replica status after catch-up"); + if status.needs_reseed { + needs_reseed = true; + break; + } + if status.applied_log_index >= target_head { + break; + } + } + Err(err) => { + let status = replica + .replica_replication_status() + .expect("replica status after catch-up error"); + if status.needs_reseed || err.to_string().contains("reseed") { + needs_reseed = true; + break; + } + panic!("unexpected catch-up error before reseed: {err}"); + } + } + } + + let status = replica + .replica_replication_status() + .expect("final replica status"); + assert!( + needs_reseed || status.needs_reseed, + "lagging replica across epoch retention gap must require reseed; status={status:?}, replica_nodes={}, primary_nodes={}", + replica.count_nodes(), + primary.count_nodes() + ); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn transient_missing_segments_do_not_immediately_require_reseed() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-transient-gap-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-transient-gap-primary.sidecar"); + let replica_path = dir.path().join("phase-d-transient-gap-replica.kitedb"); + let replica_sidecar = dir.path().join("phase-d-transient-gap-replica.sidecar"); + + let primary = + open_primary(&primary_path, &primary_sidecar, 1024 * 1024, 8).expect("open primary"); + primary.begin(false).expect("begin base"); + primary.create_node(Some("base")).expect("create base"); + primary + .commit_with_token() + .expect("commit base") + .expect("token base"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + primary.begin(false).expect("begin c1"); + primary.create_node(Some("c1")).expect("create c1"); + primary + .commit_with_token() + .expect("commit c1") + .expect("token c1"); + + let mut hidden_segments = Vec::new(); + for entry in std::fs::read_dir(&primary_sidecar).expect("read primary sidecar") { + let path = entry.expect("read sidecar entry").path(); + let is_segment = path + .file_name() + .and_then(|name| name.to_str()) + .is_some_and(|name| name.starts_with("segment-") && name.ends_with(".rlog")); + if !is_segment { + continue; + } + let hidden = path.with_extension("rlog.hidden"); + std::fs::rename(&path, &hidden).expect("temporarily hide segment"); + hidden_segments.push((path, hidden)); + } + assert!( + !hidden_segments.is_empty(), + "test setup failed: no segment files discovered" + ); + + let err = replica + .replica_catch_up_once(64) + .expect_err("transient segment unavailability must fail catch-up attempt"); + assert!( + !replica + .replica_replication_status() + .expect("replica status after transient segment miss") + .needs_reseed, + "transient segment unavailability must not force immediate reseed: {err}" + ); + + for (segment, hidden) in hidden_segments { + std::fs::rename(&hidden, &segment).expect("restore hidden segment"); + } + + let applied = replica + .replica_catch_up_once(64) + .expect("replica should recover after transient segment availability"); + assert!(applied > 0, "replica should apply pending frames"); + assert!( + !replica + .replica_replication_status() + .expect("replica status after recovery") + .needs_reseed, + "successful recovery should keep reseed flag cleared" + ); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn bootstrap_handles_concurrent_primary_writes_safely() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-bootstrap-race-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-bootstrap-race-primary.sidecar"); + let replica_path = dir.path().join("phase-d-bootstrap-race-replica.kitedb"); + let replica_sidecar = dir.path().join("phase-d-bootstrap-race-replica.sidecar"); + + let primary = + Arc::new(open_primary(&primary_path, &primary_sidecar, 1024 * 1024, 8).expect("open primary")); + + primary.begin(false).expect("begin seed"); + for i in 0..5_000 { + primary + .create_node(Some(&format!("seed-{i}"))) + .expect("create seed"); + } + primary.commit_with_token().expect("commit seed"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + + let stop = Arc::new(AtomicBool::new(false)); + let wrote = Arc::new(AtomicUsize::new(0)); + + let writer_primary = Arc::clone(&primary); + let writer_stop = Arc::clone(&stop); + let writer_wrote = Arc::clone(&wrote); + let writer = std::thread::spawn(move || { + let mut i = 0usize; + let mut local_commits = 0usize; + while !writer_stop.load(Ordering::Relaxed) { + if writer_primary.begin(false).is_ok() { + let _ = writer_primary.create_node(Some(&format!("race-{i}"))); + match writer_primary.commit_with_token() { + Ok(_) => { + writer_wrote.fetch_add(1, Ordering::Relaxed); + local_commits = local_commits.saturating_add(1); + if local_commits % 64 == 0 { + let _ = writer_primary.checkpoint(); + } + } + Err(_) => { + let _ = writer_primary.rollback(); + let _ = writer_primary.checkpoint(); + } + } + } + i = i.saturating_add(1); + } + }); + + let bootstrap = replica.replica_bootstrap_from_snapshot(); + stop.store(true, Ordering::Relaxed); + writer.join().expect("join writer"); + + let wrote_commits = wrote.load(Ordering::Relaxed); + assert!( + wrote_commits > 0, + "test setup failed: expected concurrent primary commits during bootstrap" + ); + + match bootstrap { + Ok(()) => { + primary + .checkpoint() + .expect("checkpoint primary after contention"); + let target_head = primary + .primary_replication_status() + .expect("primary status after contention") + .head_log_index; + let mut stalled = 0usize; + for _ in 0..128 { + let status = replica + .replica_replication_status() + .expect("replica status during contention catch-up"); + if status.applied_log_index >= target_head { + break; + } + let applied = replica + .replica_catch_up_once(256) + .expect("catch-up after bootstrap under contention"); + if applied == 0 { + stalled = stalled.saturating_add(1); + if stalled >= 20 { + break; + } + std::thread::sleep(Duration::from_millis(5)); + } else { + stalled = 0; + } + } + let final_status = replica + .replica_replication_status() + .expect("final replica status after contention catch-up"); + assert!( + final_status.applied_log_index >= target_head, + "replica did not catch up after bootstrap under contention: applied={}, target={}", + final_status.applied_log_index, + target_head + ); + } + Err(err) => { + let message = err.to_string(); + assert!( + message.contains("quiesce") + || message.contains("WAL buffer full") + || message.contains("WalBufferFull"), + "unexpected bootstrap error: {err}" + ); + } + } + + close_single_file(replica).expect("close replica"); + let primary = Arc::into_inner(primary).expect("primary unique"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn bootstrap_retries_until_source_quiesces() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-bootstrap-retry-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-bootstrap-retry-primary.sidecar"); + let replica_path = dir.path().join("phase-d-bootstrap-retry-replica.kitedb"); + let replica_sidecar = dir.path().join("phase-d-bootstrap-retry-replica.sidecar"); + + let primary = + Arc::new(open_primary(&primary_path, &primary_sidecar, 1024 * 1024, 8).expect("open primary")); + + primary.begin(false).expect("begin seed"); + for i in 0..5_000 { + primary + .create_node(Some(&format!("seed-{i}"))) + .expect("create seed"); + } + primary.commit_with_token().expect("commit seed"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + + let writer_primary = Arc::clone(&primary); + let writer = std::thread::spawn(move || { + for i in 0..80 { + if writer_primary.begin(false).is_ok() { + let _ = writer_primary.create_node(Some(&format!("retry-race-{i}"))); + let _ = writer_primary.commit_with_token(); + } + std::thread::sleep(Duration::from_millis(2)); + } + }); + + let bootstrap = replica.replica_bootstrap_from_snapshot(); + writer.join().expect("join writer"); + + bootstrap.expect( + "bootstrap should retry while writes are active and eventually succeed once source quiesces", + ); + primary + .checkpoint() + .expect("checkpoint primary after writer"); + let target_head = primary + .primary_replication_status() + .expect("primary status after writer") + .head_log_index; + + let mut stalled = 0usize; + for _ in 0..128 { + let status = replica + .replica_replication_status() + .expect("replica status during catch-up"); + if status.applied_log_index >= target_head { + break; + } + let applied = replica + .replica_catch_up_once(256) + .expect("catch-up after bootstrap"); + if applied == 0 { + stalled = stalled.saturating_add(1); + if stalled >= 20 { + break; + } + std::thread::sleep(Duration::from_millis(5)); + } else { + stalled = 0; + } + } + let final_status = replica + .replica_replication_status() + .expect("final replica status after catch-up"); + assert!( + final_status.applied_log_index >= target_head, + "replica did not catch up after bootstrap retry: applied={}, target={}", + final_status.applied_log_index, + target_head + ); + + close_single_file(replica).expect("close replica"); + let primary = Arc::into_inner(primary).expect("primary unique"); + close_single_file(primary).expect("close primary"); +} + +#[test] +fn promotion_race_rejects_split_brain_writes() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-race.kitedb"); + let sidecar = dir.path().join("phase-d-race.sidecar"); + + let left = Arc::new(open_primary(&db_path, &sidecar, 128, 8).expect("open left")); + let right = Arc::new(open_primary(&db_path, &sidecar, 128, 8).expect("open right")); + + let l = Arc::clone(&left); + let h1 = std::thread::spawn(move || { + let promote = l.primary_promote_to_next_epoch(); + l.begin(false).expect("left begin"); + l.create_node(Some("left")).expect("left create"); + let commit = l.commit_with_token(); + (promote, commit) + }); + + let r = Arc::clone(&right); + let h2 = std::thread::spawn(move || { + let promote = r.primary_promote_to_next_epoch(); + r.begin(false).expect("right begin"); + r.create_node(Some("right")).expect("right create"); + let commit = r.commit_with_token(); + (promote, commit) + }); + + let (left_promote, left_result) = h1.join().expect("left join"); + let (right_promote, right_result) = h2.join().expect("right join"); + assert!(left_promote.is_ok()); + assert!(right_promote.is_ok()); + + let left_ok = left_result.as_ref().is_ok_and(|token| token.is_some()); + let right_ok = right_result.as_ref().is_ok_and(|token| token.is_some()); + assert!( + left_ok ^ right_ok, + "exactly one writer should succeed after race" + ); + + let left = Arc::into_inner(left).expect("left unique"); + let right = Arc::into_inner(right).expect("right unique"); + close_single_file(left).expect("close left"); + close_single_file(right).expect("close right"); +} + +#[test] +fn retention_time_window_keeps_recent_segments() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-retention-window.kitedb"); + let sidecar = dir.path().join("phase-d-retention-window.sidecar"); + + let primary = open_single_file( + &db_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(&sidecar) + .replication_segment_max_bytes(1) + .replication_retention_min_entries(0) + .replication_retention_min_ms(60_000), + ) + .expect("open primary"); + + for i in 0..6 { + primary.begin(false).expect("begin"); + primary + .create_node(Some(&format!("w-{i}"))) + .expect("create"); + primary.commit_with_token().expect("commit").expect("token"); + } + + let segments_before = std::fs::read_dir(&sidecar) + .expect("list sidecar") + .filter_map(|entry| entry.ok()) + .filter(|entry| entry.file_name().to_string_lossy().starts_with("segment-")) + .count(); + assert!( + segments_before > 1, + "expected multiple segments for retention" + ); + + let prune = primary.primary_run_retention().expect("run retention"); + assert_eq!(prune.pruned_segments, 0); + + // Ensure no filesystem-timestamp race with segment creation. + std::thread::sleep(Duration::from_millis(5)); + + let segments_after = std::fs::read_dir(&sidecar) + .expect("list sidecar after retention") + .filter_map(|entry| entry.ok()) + .filter(|entry| entry.file_name().to_string_lossy().starts_with("segment-")) + .count(); + assert_eq!(segments_after, segments_before); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn replica_open_requires_source_db_path() { + let dir = tempfile::tempdir().expect("tempdir"); + let replica_path = dir.path().join("phase-d-misconfig-no-source.kitedb"); + let replica_sidecar = dir.path().join("phase-d-misconfig-no-source.sidecar"); + + let err = open_single_file( + &replica_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Replica) + .replication_sidecar_path(&replica_sidecar), + ) + .err() + .expect("replica open without source db path must fail"); + + assert!( + err.to_string().contains("source db path"), + "unexpected error: {err}" + ); +} + +#[test] +fn replica_open_rejects_source_sidecar_equal_local_sidecar() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-misconfig-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-misconfig-primary.sidecar"); + let replica_path = dir.path().join("phase-d-misconfig-replica.kitedb"); + + let primary = open_primary(&primary_path, &primary_sidecar, 128, 8).expect("open primary"); + primary.begin(false).expect("begin primary"); + primary.create_node(Some("seed")).expect("create seed"); + primary.commit_with_token().expect("commit primary"); + + let err = open_single_file( + &replica_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Replica) + .replication_sidecar_path(&primary_sidecar) + .replication_source_db_path(&primary_path) + .replication_source_sidecar_path(&primary_sidecar), + ) + .err() + .expect("replica local/source sidecar collision must fail"); + + assert!( + err.to_string().contains("source sidecar path must differ"), + "unexpected error: {err}" + ); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn primary_snapshot_transport_export_includes_metadata_and_optional_data() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-transport-snapshot.kitedb"); + let sidecar = dir.path().join("phase-d-transport-snapshot.sidecar"); + let primary = open_primary(&db_path, &sidecar, 128, 8).expect("open primary"); + + primary.begin(false).expect("begin"); + primary.create_node(Some("snap-1")).expect("create"); + primary.commit_with_token().expect("commit"); + + let without_data = primary + .primary_export_snapshot_transport_json(false) + .expect("snapshot transport export"); + let without_data_json: serde_json::Value = + serde_json::from_str(&without_data).expect("parse snapshot export"); + assert_eq!(without_data_json["format"], "single-file-db-copy"); + assert_eq!(without_data_json["epoch"], 1); + assert_eq!(without_data_json["data_base64"], serde_json::Value::Null); + assert!(without_data_json["checksum_crc32c"] + .as_str() + .map(|value| !value.is_empty()) + .unwrap_or(false)); + + let with_data = primary + .primary_export_snapshot_transport_json(true) + .expect("snapshot export with data"); + let with_data_json: serde_json::Value = + serde_json::from_str(&with_data).expect("parse snapshot export with data"); + let encoded = with_data_json["data_base64"] + .as_str() + .expect("data_base64 must be present"); + let decoded = BASE64_STANDARD + .decode(encoded) + .expect("decode snapshot base64"); + assert_eq!( + decoded.len() as u64, + with_data_json["byte_length"] + .as_u64() + .expect("byte_length must be u64") + ); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn primary_log_transport_export_pages_by_cursor() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-transport-log.kitedb"); + let sidecar = dir.path().join("phase-d-transport-log.sidecar"); + let primary = open_primary(&db_path, &sidecar, 1, 2).expect("open primary"); + + for i in 0..5 { + primary.begin(false).expect("begin"); + primary + .create_node(Some(&format!("transport-{i}"))) + .expect("create"); + primary.commit_with_token().expect("commit"); + } + + let first = primary + .primary_export_log_transport_json(None, 2, 1024 * 1024, true) + .expect("first log export"); + let first_json: serde_json::Value = serde_json::from_str(&first).expect("parse first page"); + assert_eq!(first_json["frame_count"], 2); + assert_eq!(first_json["eof"], false); + assert!(first_json["frames"] + .as_array() + .expect("frames array") + .iter() + .all(|frame| frame["payload_base64"].as_str().is_some())); + + let cursor = first_json["next_cursor"] + .as_str() + .expect("next_cursor") + .to_string(); + let second = primary + .primary_export_log_transport_json(Some(&cursor), 4, 1024 * 1024, false) + .expect("second log export"); + let second_json: serde_json::Value = serde_json::from_str(&second).expect("parse second page"); + assert!(second_json["frame_count"].as_u64().unwrap_or_default() > 0); + assert!(second_json["frames"] + .as_array() + .expect("frames array") + .iter() + .all(|frame| frame["payload_base64"].is_null())); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn primary_log_transport_export_rejects_crc_corruption() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-transport-crc-corrupt.kitedb"); + let sidecar = dir.path().join("phase-d-transport-crc-corrupt.sidecar"); + let primary = open_primary(&db_path, &sidecar, 128, 8).expect("open primary"); + + primary.begin(false).expect("begin"); + primary.create_node(Some("crc-corrupt")).expect("create"); + primary.commit_with_token().expect("commit"); + + let mut segments: Vec<_> = std::fs::read_dir(&sidecar) + .expect("read sidecar") + .filter_map(|entry| entry.ok()) + .map(|entry| entry.path()) + .filter(|path| { + path + .file_name() + .and_then(|name| name.to_str()) + .is_some_and(|name| name.starts_with("segment-") && name.ends_with(".rlog")) + }) + .collect(); + segments.sort(); + let segment_path = segments.first().expect("segment path"); + + let mut bytes = std::fs::read(segment_path).expect("read segment"); + assert!( + bytes.len() > 32, + "test setup failed: expected segment with payload bytes" + ); + bytes[32] ^= 0xFF; + std::fs::write(segment_path, &bytes).expect("write corrupted segment"); + + let err = primary + .primary_export_log_transport_json(None, 128, 1024 * 1024, true) + .expect_err("transport export should fail on corrupted frame crc"); + assert!( + err.to_string().contains("CrcMismatch") || err.to_string().to_lowercase().contains("crc"), + "unexpected transport corruption error: {err}" + ); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn primary_reopen_does_not_reuse_log_indexes_when_manifest_lags_disk() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-manifest-lag.kitedb"); + let sidecar = dir.path().join("phase-d-manifest-lag.sidecar"); + + let primary_first = open_single_file( + &db_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(&sidecar) + .replication_segment_max_bytes(1024 * 1024) + .replication_retention_min_entries(8) + .sync_mode(SyncMode::Normal), + ) + .expect("open primary"); + primary_first.begin(false).expect("begin first"); + primary_first + .create_node(Some("first")) + .expect("create first"); + primary_first + .commit_with_token() + .expect("commit first") + .expect("token first"); + close_single_file(primary_first).expect("close first primary"); + + let primary_second = open_single_file( + &db_path, + SingleFileOpenOptions::new() + .replication_role(ReplicationRole::Primary) + .replication_sidecar_path(&sidecar) + .replication_segment_max_bytes(1024 * 1024) + .replication_retention_min_entries(8) + .sync_mode(SyncMode::Normal), + ) + .expect("reopen primary"); + primary_second.begin(false).expect("begin second"); + primary_second + .create_node(Some("second")) + .expect("create second"); + primary_second + .commit_with_token() + .expect("commit second") + .expect("token second"); + + let exported = primary_second + .primary_export_log_transport_json(None, 16, 1024 * 1024, false) + .expect("export log transport"); + let exported_json: serde_json::Value = serde_json::from_str(&exported).expect("parse json"); + let frames = exported_json["frames"].as_array().expect("frames array"); + assert!( + frames.len() >= 2, + "expected at least two frames after reopen test" + ); + let first_idx = frames[0]["log_index"].as_u64().expect("first log index"); + let second_idx = frames[1]["log_index"].as_u64().expect("second log index"); + assert!( + second_idx > first_idx, + "log indexes must remain strictly increasing across reopen: first={first_idx} second={second_idx}" + ); + + close_single_file(primary_second).expect("close second primary"); +} + +#[test] +fn primary_snapshot_transport_rejects_oversized_inline_payloads() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir + .path() + .join("phase-d-transport-snapshot-too-large.kitedb"); + let sidecar = dir + .path() + .join("phase-d-transport-snapshot-too-large.sidecar"); + let primary = open_primary(&db_path, &sidecar, 128, 8).expect("open primary"); + + let oversized = 33 * 1024 * 1024u64; + let db_file = std::fs::OpenOptions::new() + .write(true) + .open(&db_path) + .expect("open db file for resize"); + db_file.set_len(oversized).expect("set db file length"); + + let err = primary + .primary_export_snapshot_transport_json(true) + .expect_err("oversized inline snapshot export must fail"); + assert!( + err.to_string().to_lowercase().contains("snapshot") + && err.to_string().to_lowercase().contains("size"), + "unexpected oversized snapshot error: {err}" + ); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn primary_log_transport_enforces_byte_budget_even_for_first_frame() { + let dir = tempfile::tempdir().expect("tempdir"); + let db_path = dir.path().join("phase-d-transport-log-budget.kitedb"); + let sidecar = dir.path().join("phase-d-transport-log-budget.sidecar"); + let primary = open_primary(&db_path, &sidecar, 1024 * 1024, 8).expect("open primary"); + + primary.begin(false).expect("begin"); + for i in 0..300 { + primary + .create_node(Some(&format!("budget-{i:03}-{}", "x".repeat(40)))) + .expect("create"); + } + primary.commit_with_token().expect("commit"); + + let err = primary + .primary_export_log_transport_json(None, 16, 1024, true) + .expect_err("oversized frame should not bypass max_bytes budget"); + assert!( + err.to_string().contains("max_bytes"), + "unexpected max-bytes error: {err}" + ); + + close_single_file(primary).expect("close primary"); +} + +#[test] +fn replica_catch_up_retries_transient_source_manifest_errors() { + let dir = tempfile::tempdir().expect("tempdir"); + let primary_path = dir.path().join("phase-d-retry-primary.kitedb"); + let primary_sidecar = dir.path().join("phase-d-retry-primary.sidecar"); + let replica_path = dir.path().join("phase-d-retry-replica.kitedb"); + let replica_sidecar = dir.path().join("phase-d-retry-replica.sidecar"); + + let primary = open_primary(&primary_path, &primary_sidecar, 128, 8).expect("open primary"); + primary.begin(false).expect("begin seed"); + primary.create_node(Some("seed")).expect("create seed"); + primary + .commit_with_token() + .expect("commit seed") + .expect("seed token"); + + let replica = open_replica( + &replica_path, + &primary_path, + &replica_sidecar, + &primary_sidecar, + ) + .expect("open replica"); + replica + .replica_bootstrap_from_snapshot() + .expect("bootstrap snapshot"); + + primary.begin(false).expect("begin backlog"); + primary + .create_node(Some("backlog")) + .expect("create backlog"); + primary.commit_with_token().expect("commit backlog"); + + let manifest_path = primary_sidecar.join("manifest.json"); + let manifest_tmp_path = primary_sidecar.join("manifest.json.tmp.retry"); + std::fs::rename(&manifest_path, &manifest_tmp_path).expect("hide manifest"); + + let restore = std::thread::spawn({ + let manifest_path = manifest_path.clone(); + let manifest_tmp_path = manifest_tmp_path.clone(); + move || { + std::thread::sleep(Duration::from_millis(40)); + std::fs::rename(&manifest_tmp_path, &manifest_path).expect("restore manifest"); + } + }); + + let catch_up = replica.replica_catch_up_once(64); + restore.join().expect("join restore thread"); + let applied = catch_up.expect("replica catch-up should retry transient manifest read failures"); + assert!(applied > 0, "retry path should apply backlog frames"); + + close_single_file(replica).expect("close replica"); + close_single_file(primary).expect("close primary"); +} diff --git a/ray-rs/ts/index.ts b/ray-rs/ts/index.ts index c855178..b92c86b 100644 --- a/ray-rs/ts/index.ts +++ b/ray-rs/ts/index.ts @@ -66,6 +66,7 @@ export type { InferNode, InferEdgeProps, } from './schema' +export type { RuntimeProfile } from '../index' // ============================================================================= // Native Bindings @@ -126,6 +127,7 @@ type NodeObject = NodeRef & Record type NodeIdLike = number | { id: number } type NodePropsSelection = Array type SyncMode = JsSyncMode +type ReplicationRole = 'disabled' | 'primary' | 'replica' type InsertExecutorSingle = Omit & { returning(): InferNode } @@ -1026,11 +1028,25 @@ export { // Re-export utility functions export { openDatabase, + recommendedSafeProfile, + recommendedBalancedProfile, + recommendedReopenHeavyProfile, createBackup, restoreBackup, backupInfo, createOfflineBackup, collectMetrics, + collectReplicationLogTransportJson, + collectReplicationMetricsOtelJson, + collectReplicationMetricsOtelProtobuf, + collectReplicationMetricsPrometheus, + collectReplicationSnapshotTransportJson, + pushReplicationMetricsOtelJson, + pushReplicationMetricsOtelJsonWithOptions, + pushReplicationMetricsOtelGrpc, + pushReplicationMetricsOtelGrpcWithOptions, + pushReplicationMetricsOtelProtobuf, + pushReplicationMetricsOtelProtobufWithOptions, healthCheck, createVectorIndex, bruteForceSearch, @@ -1039,6 +1055,34 @@ export { version, } from '../index' +export { + authorizeReplicationAdminRequest, + createForwardedTlsMtlsMatcher, + createReplicationAdminAuthorizer, + createNodeTlsMtlsMatcher, + createReplicationTransportAdapter, + isForwardedTlsClientAuthorized, + isReplicationAdminAuthorized, + isNodeTlsClientAuthorized, + readReplicationLogTransport, + readReplicationSnapshotTransport, +} from './replication_transport' + +export type { + ReplicationAdminAuthConfig, + ReplicationAdminAuthMode, + ReplicationAdminAuthRequest, + ReplicationForwardedMtlsMatcherOptions, + ReplicationNodeMtlsMatcherOptions, + ReplicationNodeTlsLikeRequest, + ReplicationNodeTlsLikeSocket, + ReplicationLogTransportFrame, + ReplicationLogTransportOptions, + ReplicationLogTransportPage, + ReplicationSnapshotTransport, + ReplicationTransportAdapter, +} from './replication_transport' + // Re-export common types with clean names export type { // Database @@ -1072,6 +1116,8 @@ export type { MvccStats, HealthCheckResult, HealthCheckEntry, + OtlpHttpExportResult, + PushReplicationMetricsOtelOptions, // Traversal JsTraverseOptions as TraverseOptions, JsTraversalStep as TraversalStep, @@ -1119,6 +1165,14 @@ export interface KiteOptions { readOnly?: boolean /** Create database if it doesn't exist (default: true) */ createIfMissing?: boolean + /** Enable MVCC (snapshot isolation + conflict detection) */ + mvcc?: boolean + /** MVCC GC interval in ms */ + mvccGcIntervalMs?: number + /** MVCC retention in ms */ + mvccRetentionMs?: number + /** MVCC max version chain depth */ + mvccMaxChainDepth?: number /** Sync mode for durability (default: "Full") */ syncMode?: SyncMode /** Enable group commit (coalesce WAL flushes across commits) */ @@ -1129,6 +1183,20 @@ export interface KiteOptions { walSizeMb?: number /** WAL usage threshold (0.0-1.0) to trigger auto-checkpoint */ checkpointThreshold?: number + /** Replication role */ + replicationRole?: ReplicationRole + /** Replication sidecar path override */ + replicationSidecarPath?: string + /** Source primary db path (replica role only) */ + replicationSourceDbPath?: string + /** Source primary sidecar path override (replica role only) */ + replicationSourceSidecarPath?: string + /** Segment rotation threshold in bytes (primary role only) */ + replicationSegmentMaxBytes?: number + /** Minimum retained entries window (primary role only) */ + replicationRetentionMinEntries?: number + /** Minimum retained segment age in milliseconds (primary role only) */ + replicationRetentionMinMs?: number } // ============================================================================= @@ -1176,18 +1244,58 @@ function edgeSpecToNative(spec: EdgeSpec): JsEdgeSpec { } } +function replicationRoleToNative(role: ReplicationRole): 'Disabled' | 'Primary' | 'Replica' { + switch (role) { + case 'disabled': + return 'Disabled' + case 'primary': + return 'Primary' + case 'replica': + return 'Replica' + } +} + function optionsToNative(options: KiteOptions): JsKiteOptions { - return { + const nativeOptions: JsKiteOptions = { nodes: options.nodes.map(nodeSpecToNative), edges: options.edges.map(edgeSpecToNative), readOnly: options.readOnly, createIfMissing: options.createIfMissing, + mvcc: options.mvcc, + mvccGcIntervalMs: options.mvccGcIntervalMs, + mvccRetentionMs: options.mvccRetentionMs, + mvccMaxChainDepth: options.mvccMaxChainDepth, syncMode: options.syncMode, groupCommitEnabled: options.groupCommitEnabled, groupCommitWindowMs: options.groupCommitWindowMs, walSizeMb: options.walSizeMb, checkpointThreshold: options.checkpointThreshold, } + + const mutable = nativeOptions as unknown as Record + if (options.replicationRole) { + mutable.replicationRole = replicationRoleToNative(options.replicationRole) + } + if (options.replicationSidecarPath) { + mutable.replicationSidecarPath = options.replicationSidecarPath + } + if (options.replicationSourceDbPath) { + mutable.replicationSourceDbPath = options.replicationSourceDbPath + } + if (options.replicationSourceSidecarPath) { + mutable.replicationSourceSidecarPath = options.replicationSourceSidecarPath + } + if (options.replicationSegmentMaxBytes !== undefined) { + mutable.replicationSegmentMaxBytes = options.replicationSegmentMaxBytes + } + if (options.replicationRetentionMinEntries !== undefined) { + mutable.replicationRetentionMinEntries = options.replicationRetentionMinEntries + } + if (options.replicationRetentionMinMs !== undefined) { + mutable.replicationRetentionMinMs = options.replicationRetentionMinMs + } + + return nativeOptions } // ============================================================================= diff --git a/ray-rs/ts/replication_transport.ts b/ray-rs/ts/replication_transport.ts new file mode 100644 index 0000000..f9a820a --- /dev/null +++ b/ray-rs/ts/replication_transport.ts @@ -0,0 +1,362 @@ +import { + collectReplicationLogTransportJson, + collectReplicationMetricsOtelJson, + collectReplicationMetricsPrometheus, + collectReplicationSnapshotTransportJson, +} from '../index' +import type { Database } from '../index' + +export interface ReplicationSnapshotTransport { + format: string + db_path: string + byte_length: number + checksum_crc32c: string + generated_at_ms: number + epoch: number + head_log_index: number + retained_floor: number + start_cursor: string + data_base64?: string | null +} + +export interface ReplicationLogTransportFrame { + epoch: number + log_index: number + segment_id: number + segment_offset: number + bytes: number + payload_base64?: string | null +} + +export interface ReplicationLogTransportPage { + epoch: number + head_log_index: number + retained_floor: number + cursor?: string | null + next_cursor?: string | null + eof: boolean + frame_count: number + total_bytes: number + frames: ReplicationLogTransportFrame[] +} + +export interface ReplicationLogTransportOptions { + cursor?: string | null + maxFrames?: number + maxBytes?: number + includePayload?: boolean +} + +export interface ReplicationTransportAdapter { + snapshot(includeData?: boolean): ReplicationSnapshotTransport + log(options?: ReplicationLogTransportOptions): ReplicationLogTransportPage + metricsPrometheus(): string + metricsOtelJson(): string +} + +export type ReplicationAdminAuthMode = 'none' | 'token' | 'mtls' | 'token_or_mtls' | 'token_and_mtls' + +export interface ReplicationAdminAuthRequest { + headers?: Record | null +} + +export interface ReplicationAdminAuthConfig< + TRequest extends ReplicationAdminAuthRequest = ReplicationAdminAuthRequest, +> { + mode?: ReplicationAdminAuthMode + token?: string | null + mtlsHeader?: string + mtlsSubjectRegex?: RegExp | null + mtlsMatcher?: (request: TRequest) => boolean +} + +export interface ReplicationNodeTlsLikeSocket { + authorized?: boolean | null + getPeerCertificate?: () => unknown +} + +export interface ReplicationNodeTlsLikeRequest extends ReplicationAdminAuthRequest { + socket?: ReplicationNodeTlsLikeSocket | null + client?: ReplicationNodeTlsLikeSocket | null + raw?: { socket?: ReplicationNodeTlsLikeSocket | null } | null + req?: { socket?: ReplicationNodeTlsLikeSocket | null } | null +} + +export interface ReplicationNodeMtlsMatcherOptions { + requirePeerCertificate?: boolean +} + +export interface ReplicationForwardedMtlsMatcherOptions { + requirePeerCertificate?: boolean + requireVerifyHeader?: boolean + verifyHeaders?: string[] + certHeaders?: string[] + successValues?: string[] +} + +const REPLICATION_ADMIN_AUTH_MODES = new Set([ + 'none', + 'token', + 'mtls', + 'token_or_mtls', + 'token_and_mtls', +]) + +const DEFAULT_FORWARDED_VERIFY_HEADERS = ['x-client-verify', 'ssl-client-verify'] +const DEFAULT_FORWARDED_CERT_HEADERS = ['x-forwarded-client-cert', 'x-client-cert'] +const DEFAULT_FORWARDED_SUCCESS_VALUES = ['success', 'successful', 'true', '1', 'yes', 'verified', 'ok'] + +function hasPeerCertificate(socket: ReplicationNodeTlsLikeSocket): boolean { + if (!socket.getPeerCertificate) return false + try { + const certificate = socket.getPeerCertificate() + if (!certificate || typeof certificate !== 'object') return false + return Object.keys(certificate as Record).length > 0 + } catch { + return false + } +} + +function isSocketAuthorized( + socket: ReplicationNodeTlsLikeSocket | null | undefined, + options: Required, +): boolean { + if (!socket || socket.authorized !== true) return false + if (!options.requirePeerCertificate) return true + return hasPeerCertificate(socket) +} + +export function isNodeTlsClientAuthorized( + request: ReplicationNodeTlsLikeRequest, + options: ReplicationNodeMtlsMatcherOptions = {}, +): boolean { + const resolved: Required = { + requirePeerCertificate: options.requirePeerCertificate ?? false, + } + return ( + isSocketAuthorized(request.socket, resolved) || + isSocketAuthorized(request.client, resolved) || + isSocketAuthorized(request.raw?.socket, resolved) || + isSocketAuthorized(request.req?.socket, resolved) + ) +} + +export function createNodeTlsMtlsMatcher( + options: ReplicationNodeMtlsMatcherOptions = {}, +): (request: ReplicationNodeTlsLikeRequest) => boolean { + return (request: ReplicationNodeTlsLikeRequest): boolean => isNodeTlsClientAuthorized(request, options) +} + +function normalizeHeaderNames(headers: string[] | undefined, fallback: string[]): string[] { + const names = (headers ?? fallback) + .map((name) => name.trim().toLowerCase()) + .filter((name) => name.length > 0) + if (names.length > 0) return names + return fallback +} + +function normalizeHeaderValues(values: string[] | undefined, fallback: string[]): Set { + const normalized = (values ?? fallback) + .map((value) => value.trim().toLowerCase()) + .filter((value) => value.length > 0) + if (normalized.length > 0) return new Set(normalized) + return new Set(fallback) +} + +export function isForwardedTlsClientAuthorized( + request: ReplicationAdminAuthRequest, + options: ReplicationForwardedMtlsMatcherOptions = {}, +): boolean { + const verifyHeaders = normalizeHeaderNames(options.verifyHeaders, DEFAULT_FORWARDED_VERIFY_HEADERS) + const certHeaders = normalizeHeaderNames(options.certHeaders, DEFAULT_FORWARDED_CERT_HEADERS) + const successValues = normalizeHeaderValues(options.successValues, DEFAULT_FORWARDED_SUCCESS_VALUES) + const requireVerifyHeader = options.requireVerifyHeader ?? true + const requirePeerCertificate = options.requirePeerCertificate ?? false + + const verifyValues: string[] = [] + for (const header of verifyHeaders) { + const value = getHeaderValue(request, header) + if (value) verifyValues.push(value.toLowerCase()) + } + const verifyOk = verifyValues.length > 0 + ? verifyValues.some((value) => successValues.has(value)) + : !requireVerifyHeader + if (!verifyOk) return false + + if (!requirePeerCertificate) return true + for (const header of certHeaders) { + if (getHeaderValue(request, header)) return true + } + return false +} + +export function createForwardedTlsMtlsMatcher( + options: ReplicationForwardedMtlsMatcherOptions = {}, +): (request: ReplicationAdminAuthRequest) => boolean { + return (request: ReplicationAdminAuthRequest): boolean => + isForwardedTlsClientAuthorized(request, options) +} + +type NormalizedReplicationAdminAuthConfig = + { + mode: ReplicationAdminAuthMode + token: string | null + mtlsHeader: string + mtlsSubjectRegex: RegExp | null + mtlsMatcher: ((request: TRequest) => boolean) | null + } + +function normalizeReplicationAdminAuthConfig< + TRequest extends ReplicationAdminAuthRequest = ReplicationAdminAuthRequest, +>(config: ReplicationAdminAuthConfig): NormalizedReplicationAdminAuthConfig { + const modeRaw = config.mode ?? 'none' + if (!REPLICATION_ADMIN_AUTH_MODES.has(modeRaw)) { + throw new Error( + `Invalid replication admin auth mode '${String(modeRaw)}'; expected none|token|mtls|token_or_mtls|token_and_mtls`, + ) + } + const token = config.token?.trim() || null + if ((modeRaw === 'token' || modeRaw === 'token_or_mtls' || modeRaw === 'token_and_mtls') && !token) { + throw new Error(`replication admin auth mode '${modeRaw}' requires a non-empty token`) + } + const mtlsHeaderRaw = config.mtlsHeader?.trim().toLowerCase() + const mtlsHeader = mtlsHeaderRaw && mtlsHeaderRaw.length > 0 ? mtlsHeaderRaw : 'x-forwarded-client-cert' + return { + mode: modeRaw, + token, + mtlsHeader, + mtlsSubjectRegex: config.mtlsSubjectRegex ?? null, + mtlsMatcher: config.mtlsMatcher ?? null, + } +} + +function getHeaderValue(request: ReplicationAdminAuthRequest, name: string): string | null { + const headers = request.headers + if (!headers) return null + const direct = headers[name] + if (typeof direct === 'string' && direct.trim().length > 0) { + return direct.trim() + } + for (const [key, value] of Object.entries(headers)) { + if (key.toLowerCase() !== name) continue + if (typeof value !== 'string') continue + const trimmed = value.trim() + if (trimmed.length > 0) return trimmed + } + return null +} + +function isTokenMatch(request: ReplicationAdminAuthRequest, token: string | null): boolean { + if (!token) return false + const authorization = getHeaderValue(request, 'authorization') + if (!authorization) return false + return authorization === `Bearer ${token}` +} + +function isMtlsMatch( + request: TRequest, + config: NormalizedReplicationAdminAuthConfig, +): boolean { + if (config.mtlsMatcher) { + return config.mtlsMatcher(request) + } + const certValue = getHeaderValue(request, config.mtlsHeader) + if (!certValue) return false + if (!config.mtlsSubjectRegex) return true + return config.mtlsSubjectRegex.test(certValue) +} + +function isAuthorizedWithNormalized( + request: TRequest, + config: NormalizedReplicationAdminAuthConfig, +): boolean { + const tokenOk = isTokenMatch(request, config.token) + const mtlsOk = isMtlsMatch(request, config) + + switch (config.mode) { + case 'none': + return true + case 'token': + return tokenOk + case 'mtls': + return mtlsOk + case 'token_or_mtls': + return tokenOk || mtlsOk + case 'token_and_mtls': + return tokenOk && mtlsOk + } +} + +export function isReplicationAdminAuthorized< + TRequest extends ReplicationAdminAuthRequest = ReplicationAdminAuthRequest, +>(request: TRequest, config: ReplicationAdminAuthConfig): boolean { + const normalized = normalizeReplicationAdminAuthConfig(config) + return isAuthorizedWithNormalized(request, normalized) +} + +export function authorizeReplicationAdminRequest< + TRequest extends ReplicationAdminAuthRequest = ReplicationAdminAuthRequest, +>(request: TRequest, config: ReplicationAdminAuthConfig): void { + const normalized = normalizeReplicationAdminAuthConfig(config) + if (isAuthorizedWithNormalized(request, normalized)) { + return + } + throw new Error(`Unauthorized: replication admin auth mode '${normalized.mode}' not satisfied`) +} + +export function createReplicationAdminAuthorizer< + TRequest extends ReplicationAdminAuthRequest = ReplicationAdminAuthRequest, +>(config: ReplicationAdminAuthConfig): (request: TRequest) => void { + const normalized = normalizeReplicationAdminAuthConfig(config) + return (request: TRequest): void => { + if (isAuthorizedWithNormalized(request, normalized)) { + return + } + throw new Error(`Unauthorized: replication admin auth mode '${normalized.mode}' not satisfied`) + } +} + +function parseJson(raw: string, label: string): T { + try { + return JSON.parse(raw) as T + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + throw new Error(`Failed to parse ${label}: ${message}`) + } +} + +export function readReplicationSnapshotTransport(db: Database, includeData = false): ReplicationSnapshotTransport { + const raw = collectReplicationSnapshotTransportJson(db, includeData) + return parseJson(raw, 'replication snapshot transport JSON') +} + +export function readReplicationLogTransport( + db: Database, + options: ReplicationLogTransportOptions = {}, +): ReplicationLogTransportPage { + const raw = collectReplicationLogTransportJson( + db, + options.cursor ?? null, + options.maxFrames ?? 128, + options.maxBytes ?? 1024 * 1024, + options.includePayload ?? true, + ) + return parseJson(raw, 'replication log transport JSON') +} + +export function createReplicationTransportAdapter(db: Database): ReplicationTransportAdapter { + return { + snapshot(includeData = false): ReplicationSnapshotTransport { + return readReplicationSnapshotTransport(db, includeData) + }, + log(options: ReplicationLogTransportOptions = {}): ReplicationLogTransportPage { + return readReplicationLogTransport(db, options) + }, + metricsPrometheus(): string { + return collectReplicationMetricsPrometheus(db) + }, + metricsOtelJson(): string { + return collectReplicationMetricsOtelJson(db) + }, + } +}