diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0c55d28..2ed7f58 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,7 +1,6 @@ name: ci on: - push: pull_request: jobs: @@ -46,7 +45,7 @@ jobs: with: version: 0.15.2 - name: Run coverage gate - run: make test-coverage + run: make coverage - name: Upload to Codecov uses: codecov/codecov-action@v5 with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c96e137..d1a8405 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,23 +2,18 @@ name: Release on: push: - branches: - - main + tags: + - "v[0-9]+.[0-9]+" + - "v[0-9]+.[0-9]+.[0-9]+" + +concurrency: + group: release-tag + cancel-in-progress: false permissions: contents: read jobs: - # ── Verify tests pass on release tag ───────────────────────────────────── - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - uses: mlugg/setup-zig@v2 - with: - version: 0.15.2 - - run: make test-unit - # ── Verify VERSION matches git tag ──────────────────────────────────────── verify-version: runs-on: ubuntu-latest @@ -31,16 +26,98 @@ jobs: id: meta run: | VERSION="$(tr -d '[:space:]' < VERSION)" + TAG_VERSION="${GITHUB_REF#refs/tags/v}" echo "$VERSION" | grep -Eq '^[0-9]+\.[0-9]+(\.[0-9]+)?$' || { echo "✗ VERSION must be version format x.y or x.y.z, got: $VERSION"; exit 1; } - TAG="v$VERSION" + [ "$TAG_VERSION" = "$VERSION" ] || { echo "✗ VERSION ($VERSION) does not match tag ($TAG_VERSION)"; exit 1; } + TAG="v$TAG_VERSION" echo "version=$VERSION" >> "$GITHUB_OUTPUT" echo "tag=$TAG" >> "$GITHUB_OUTPUT" - echo "✓ release version resolved: $VERSION ($TAG)" + echo "✓ release version resolved: $VERSION ($TAG), tag matches VERSION" + + lint: + runs-on: ubuntu-latest + needs: verify-version + steps: + - uses: actions/checkout@v6 + - uses: mlugg/setup-zig@v2 + with: + version: 0.15.2 + - run: make lint + + test: + runs-on: ubuntu-latest + needs: lint + steps: + - uses: actions/checkout@v6 + - uses: mlugg/setup-zig@v2 + with: + version: 0.15.2 + - run: make test-unit + + cross-compile: + runs-on: ubuntu-latest + needs: test + strategy: + matrix: + target: + - x86_64-linux + - aarch64-linux + - x86_64-macos + - aarch64-macos + steps: + - uses: actions/checkout@v6 + - uses: mlugg/setup-zig@v2 + with: + version: 0.15.2 + - name: Build ${{ matrix.target }} + run: zig build -Dtarget=${{ matrix.target }} -Doptimize=ReleaseSafe + - name: Verify no external C deps + run: | + result=$(zig build test-bin -Dtarget=${{ matrix.target }} --summary all 2>&1 | grep -c "link with" || true) + if [ "$result" -eq 0 ]; then + echo "PASS: pure Zig" + else + echo "WARN: C deps detected" + exit 1 + fi + + coverage: + runs-on: ubuntu-latest + needs: cross-compile + container: + image: debian:trixie-slim + options: --security-opt seccomp=unconfined + steps: + - name: Install kcov and toolchain deps + run: | + apt-get update + apt-get install -y --no-install-recommends \ + kcov git curl xz-utils ca-certificates make gpg + - uses: actions/checkout@v6 + - uses: mlugg/setup-zig@v2 + with: + version: 0.15.2 + - name: Run coverage gate + run: make coverage + - name: Upload to Codecov + uses: codecov/codecov-action@v5 + with: + files: coverage/cobertura.xml + flags: posthog-zig + fail_ci_if_error: true + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + - name: Upload coverage artifact + uses: actions/upload-artifact@v7 + with: + name: coverage + path: coverage/ + retention-days: 7 # ── GitHub Release with CHANGELOG excerpt ──────────────────────────────── create-release: runs-on: ubuntu-latest - needs: [test, verify-version] + needs: [coverage, verify-version] permissions: contents: write steps: diff --git a/Makefile b/Makefile index 7f54160..a5eb77e 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,8 @@ COVERAGE_MIN_LINES ?= 60 .DEFAULT_GOAL := help -.PHONY: help lint fmt fmt-check test test-unit test-integration test-depth \ - test-bin test-coverage bench memleak clean +.PHONY: help lint fmt fmt-check test test-unit test-integration \ + test-bin coverage bench memleak clean help: ## Show available targets @echo "posthog-zig" @@ -18,8 +18,7 @@ help: ## Show available targets @echo " fmt Auto-format all Zig source" @echo " test Run unit tests" @echo " test-integration Run integration tests (requires POSTHOG_API_KEY)" - @echo " test-depth Enforce minimum test count gate" - @echo " test-coverage Run kcov coverage + enforce minimum threshold" + @echo " coverage Run kcov coverage + enforce minimum threshold" @echo " bench Run capture() hot-path benchmark" @echo " memleak Run allocator leak gate" @echo " clean Remove build artifacts" @@ -40,7 +39,7 @@ lint: fmt-check ## Check formatting # ── Tests ──────────────────────────────────────────────────────────────────── -test: test-unit test-depth ## Run unit tests + depth gate +test: test-unit ## Run unit tests test-unit: ## Run unit tests @echo "→ Running unit tests..." @@ -59,14 +58,6 @@ test-integration: ## Run integration tests against live PostHog (requires POSTH zig build test -Dintegration=true --summary all @echo "✓ integration tests passed" -test-depth: ## Enforce minimum test count gate - @mkdir -p .tmp - @unit_count=$$(grep -rn '^test "' src -l --include='*.zig' | xargs grep -h '^test "' | wc -l | tr -d ' '); \ - integration_count=$$(grep -rn '^test "integration:' tests --include='*.zig' 2>/dev/null | wc -l | tr -d ' '); \ - printf 'unit_tests=%s\nintegration_tests=%s\n' "$$unit_count" "$$integration_count" | tee .tmp/test-depth.txt >/dev/null; \ - if [ "$$unit_count" -lt 15 ]; then echo "✗ expected >= 15 unit tests, got $$unit_count"; exit 1; fi; \ - echo "✓ test depth gate passed (unit=$$unit_count integration=$$integration_count)" - # ── Coverage ───────────────────────────────────────────────────────────────── test-bin: ## Build test binary for kcov @@ -75,7 +66,7 @@ test-bin: ## Build test binary for kcov ZIG_LOCAL_CACHE_DIR="$(ZIG_LOCAL_CACHE_DIR)" \ zig build test-bin -test-coverage: ## Run kcov coverage + enforce minimum threshold +coverage: ## Run kcov coverage + enforce minimum threshold @command -v kcov >/dev/null 2>&1 || { echo "✗ kcov required (brew install kcov / apt-get install kcov)"; exit 1; } @mkdir -p "$(ZIG_GLOBAL_CACHE_DIR)" "$(ZIG_LOCAL_CACHE_DIR)" coverage .tmp @echo "→ Building test binary..." diff --git a/README.md b/README.md index bd81c23..dcb39e1 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,18 @@ [![ci](https://github.com/usezombie/posthog-zig/actions/workflows/ci.yml/badge.svg)](https://github.com/usezombie/posthog-zig/actions/workflows/ci.yml) [![codecov](https://codecov.io/gh/usezombie/posthog-zig/branch/main/graph/badge.svg)](https://codecov.io/gh/usezombie/posthog-zig) -[![version](https://img.shields.io/github/v/release/usezombie/posthog-zig?label=version)](https://github.com/usezombie/posthog-zig/releases) +[![version](https://img.shields.io/github/v/tag/usezombie/posthog-zig?label=version&sort=semver)](https://github.com/usezombie/posthog-zig/tags) [![zig](https://img.shields.io/badge/zig-0.15.x-orange)](https://ziglang.org) [![license](https://img.shields.io/badge/license-MIT-green)](LICENSE) A server-side PostHog analytics client for Zig. Non-blocking event capture with background batch delivery, retry, and graceful shutdown. -**Version:** 0.1.0 **Zig:** 0.15.x **PostHog API:** `/batch/` (capture) + `/decide/` v3 (feature flags) --- -## What's in v0.1 +## What is here | Feature | API | Notes | |---|---|---| @@ -38,29 +37,21 @@ A server-side PostHog analytics client for Zig. Non-blocking event capture with | Zig panic (unhandled) | Queue lost — no delivery | | OOM during flush | Retry up to `max_retries`, then drop | -**v0.1 is best-effort.** For handled application errors — a caught `error.NotFound`, -a failed DB query — the queue path is fine; the process is healthy and the flush -thread is alive. For true crashes (allocator corruption, unhandled panic) queued -events may not be delivered. +Delivery is best-effort for crash scenarios. For handled application errors +(for example, a caught `error.NotFound` or a failed DB query), the process is +healthy and the queue/flush path remains reliable. -**v0.2 will add crash-safe delivery:** `captureException` with `level == .fatal` +**Upcoming release will add crash-safe delivery:** `captureException` with `level == .fatal` will write a crash file to disk synchronously (no allocator, one `write()` syscall), delivered on next startup. See [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) for the full design. -## What's not in v0.1 - -- Crash file / disk-backed fatal delivery (v0.2) — see delivery guarantees above -- Super properties / global property middleware (v0.2) -- WASM / embedded targets — requires OS threads and `std.http.Client` -- Session recording, autocapture — browser-only, use posthog-js - --- ## Install ```bash -zig fetch --save https://github.com/usezombie/posthog-zig/archive/refs/tags/v0.1.0.tar.gz +zig fetch --save https://github.com/usezombie/posthog-zig/archive/refs/tags/.tar.gz ``` `build.zig`: @@ -143,9 +134,7 @@ defer if (payload) |p| allocator.free(p); // caller owns the returned slice try client.flush(); ``` ---- - -## Integration pattern for calling systems +### Integration patterns for calling systems posthog-zig is a library. It cannot install a panic handler. The calling application owns that responsibility. @@ -177,11 +166,11 @@ pub fn main() !void { // Zig calls this on unhandled panics. // Keep it minimal — the allocator may be corrupted. pub fn panic(msg: []const u8, trace: ?*std.builtin.StackTrace, ret_addr: ?usize) noreturn { - // v0.1: best-effort. If the flush thread is still alive it may deliver + // Current behavior: best-effort. If the flush thread is still alive it may deliver // events already in the queue. Do not attempt to enqueue new events here — // the allocator state is unknown. - // v0.2: ph_client.writeCrashFile() will be safe here (zero allocation, + // Upcoming release: ph_client.writeCrashFile() will be safe here (zero allocation, // single write() syscall of the arena buffer). Not implemented yet. std.debug.defaultPanic(msg, trace, ret_addr); @@ -221,48 +210,7 @@ if (ctx.posthog) |*ph| { The `catch {}` is intentional: analytics must never propagate errors to the caller. ---- - -## Design - -### Non-blocking hot path - -`capture()`, `identify()`, `group()`, and `captureException()` serialize the event -into the write-side arena and return. No per-event heap allocation, no I/O, no blocking. -The flush thread swaps arenas (O(1) under mutex) and owns all network activity. - -``` -capture() ──► write arena ──► swap (O(1)) ──► flush thread ──► POST /batch/ - < 1μs (mutex lock) (flush fires) owns flush arena (std.http.Client) - enqueue POST all events │ - return new capture() arena.reset() └─ retry on 5xx/429 - writes to (one free, O(1)) └─ drop after max_retries - other arena -``` - -**Memory model — double-buffer arena.** Two `ArenaAllocator` instances alternate roles. -After each flush, `arena.reset()` reclaims all event memory in one operation regardless -of batch size. Fixed memory footprint: `2 × max_queue_size × avg_event_size` (~4MB at -defaults). No per-event malloc/free, no heap fragmentation. - -**Overflow.** When the write-side arena is at capacity, new events are dropped (drop-newest) -and counted. The next flush cycle resets the arena and restores full capacity. - -### Retry policy - -- Retries on: 5xx, 429, network errors -- Does not retry on: 4xx (except 429) — bad data, logged and dropped -- Backoff: `min(1s * 2^attempt, 30s)` + random jitter 0–500ms - -### Feature flag cache - -`/decide/` responses are cached per `distinct_id` with a 60s TTL and an LRU cap of 1000 entries. After the first call, `isFeatureEnabled()` returns from cache without hitting the network. - -### Shutdown - -`client.deinit()` signals the flush thread to stop accepting new events, performs a final `POST /batch/` of the remaining queue, and joins the thread (unbounded wait in v0.1 — see `shutdown_flush_timeout_ms` in Configuration). Events that cannot be delivered are logged and dropped — they are not persisted to disk. - -For deeper design rationale — memory model, crash delivery tradeoffs, v0.2 double-buffer arena target, and serialization approach — see [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md). +For deeper design rationale — memory model, crash delivery tradeoffs, and serialization approach — see [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md). --- @@ -277,7 +225,7 @@ For deeper design rationale — memory model, crash delivery tradeoffs, v0.2 dou | `flush_at` | `20` | Flush when this many events are queued | | `max_queue_size` | `1000` | Queue capacity; drops newest on overflow | | `max_retries` | `3` | Max delivery attempts per batch | -| `shutdown_flush_timeout_ms` | `5_000` | Reserved for v0.2 — `deinit()` blocks until the flush thread joins (unbounded in v0.1) | +| `shutdown_flush_timeout_ms` | `5_000` | Reserved for timed join support in a future release; currently `deinit()` blocks until the flush thread joins | | `feature_flag_ttl_ms` | `60_000` | Feature flag cache TTL per distinct_id | --- @@ -301,7 +249,7 @@ zig build -Dtarget=x86_64-linux --summary all 2>&1 | grep "link with" && echo "W zig build bench # Coverage report (requires kcov: brew install kcov / apt-get install kcov) -make test-coverage +make coverage # Memory leak gate (valgrind on Linux, leaks on macOS) make memleak @@ -309,12 +257,6 @@ make memleak --- -## Disclaimer - -This is a **server-side SDK**. It assumes `std.http.Client`, `std.Thread`, and a real OS. It does not support browser WASM, session recording, or autocapture. For frontend analytics, use [posthog-js](https://github.com/PostHog/posthog-js). - ---- - ## License MIT — see [LICENSE](LICENSE). diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 79dae6f..164060b 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -64,89 +64,6 @@ flowchart TD end ``` -
ASCII version (for terminals / non-Mermaid renderers) - -``` -┌──────────────────────────────────────────────────────────────────────────────┐ -│ zombied (usezombie control plane) │ -│ │ -│ ┌─────────────────────┐ ┌──────────────────────┐ ┌──────────────────┐ │ -│ │ capture("run_start") │ │ captureException() │ │ identify() │ │ -│ │ capture("run_end") │ │ $exception_type │ │ group() │ │ -│ │ capture("deploy") │ │ $exception_message │ │ │ │ -│ │ + custom properties │ │ $exception_level │ │ $set traits │ │ -│ │ │ │ stack_trace (opt) │ │ $group_type/key │ │ -│ └──────────┬───────────┘ └──────────┬───────────┘ └────────┬─────────┘ │ -│ │ │ │ │ -│ └─────────────────┬───────┘───────────────────────┘ │ -│ ▼ │ -│ serialize to JSON (< 1μs) │ -│ + distinct_id, $lib, $lib_version, ISO 8601 timestamp │ -└──────────────────────────────┬─────────────────────────────────────────────┘ - │ - ▼ -┌──────────────────────────────────────────────────────────────────────────────┐ -│ posthog-zig SDK │ -│ │ -│ ┌─────────────────────────── Queue (batch.zig) ───────────────────────┐ │ -│ │ │ │ -│ │ enqueue() — mutex lock, arena dupe, append to event index, unlock │ │ -│ │ If count >= flush_at (default 20): signal condition variable │ │ -│ │ If count >= max_queue_size (default 1000): drop event (newest) │ │ -│ │ │ │ -│ │ Arena A (write side) Arena B (flush side) │ │ -│ │ [ev1_json][ev2_json][ev3]... [being POSTed] │ │ -│ │ │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌───────────────────── FlushThread (flush.zig) ──────────────────────┐ │ -│ │ │ │ -│ │ loop: │ │ -│ │ wait for signal OR flush_interval_ms timeout (default 10s) │ │ -│ │ drain() — swap write↔flush sides (O(1) index flip under mutex) │ │ -│ │ if no events: continue │ │ -│ │ POST /batch/ with all events from flush side │ │ -│ │ on 2xx: on_deliver(.delivered) → resetSide() (arena reclaim) │ │ -│ │ on 429/5xx: retry up to max_retries (default 3) │ │ -│ │ exponential backoff: min(1s × 2^attempt, 30s) + jitter │ │ -│ │ on 4xx: on_deliver(.failed) → drop batch, no retry │ │ -│ │ retries exhausted: on_deliver(.dropped) → drop batch │ │ -│ │ resetSide() — one arena reset, all memory reclaimed │ │ -│ │ │ │ -│ │ on shutdown: │ │ -│ │ final drain() + POST (best-effort, no retry on manual flush) │ │ -│ │ thread.join() │ │ -│ │ │ │ -│ └────────────────────────────┬───────────────────────────────────────┘ │ -│ │ │ -└───────────────────────────────┼────────────────────────────────────────────┘ - │ - ▼ HTTP POST -┌──────────────────────────────────────────────────────────────────────────────┐ -│ PostHog (https://us.i.posthog.com/batch/) │ -│ │ -│ { "api_key": "phc_...", "batch": [ {event}, {event}, ... ] } │ -│ │ -│ Events appear in: │ -│ • Events → custom events (capture) │ -│ • Error Tracking → $exception events (captureException) │ -│ • Persons → $identify events (identify) │ -│ • Groups → $groupidentify events (group) │ -└──────────────────────────────────────────────────────────────────────────────┘ -``` - -
- -### What gets pushed - -| Method | PostHog event | Use case in zombied | -|---|---|---| -| `capture()` | Custom event name (e.g. `run_started`, `deploy_completed`) | Business analytics, usage tracking | -| `captureException()` | `$exception` with `$exception_type`, `$exception_message`, `$exception_level`, optional stack trace | Error tracking — caught errors, OOM, workspace failures | -| `identify()` | `$identify` with `$set` properties | Associate traits (email, plan) with a distinct_id | -| `group()` | `$groupidentify` with `$group_type`, `$group_key`, `$group_set` | Associate users with workspaces/orgs | - ### Key timing defaults | Parameter | Default | Purpose | @@ -155,25 +72,12 @@ flowchart TD | `flush_at` | 20 events | Threshold to wake flush thread early | | `max_queue_size` | 1,000 events | Per-side capacity; overflow drops newest | | `max_retries` | 3 | Retry count for 429/5xx responses | -| `shutdown_flush_timeout_ms` | 5,000 (5s) | Reserved for v0.2 timed join | +| `shutdown_flush_timeout_ms` | 5,000 (5s) | Reserved for timed join in a future release | | `feature_flag_ttl_ms` | 60,000 (60s) | Cache TTL for feature flag decisions | --- -## Why this SDK exists - -posthog-zig is the server-side analytics layer for the usezombie stack. The first -consumer is `zombied` — a Zig control plane daemon where latency and reliability -matter. The SDK had to be: - -- **Non-blocking on the hot path.** `capture()` cannot touch the network. -- **Pure Zig.** No C FFI, no libuv, no linking complexity. -- **Error-tracking first.** `captureException()` with PostHog `$exception` format - is the must-have for a backend service, not an afterthought. - ---- - -## Memory model: double-buffer arena (v0.1) +## Memory model: double-buffer arena ### The insight @@ -216,7 +120,7 @@ no lock contention beyond the initial enqueue. ### Properties -| | Per-event heap (rejected) | Double-buffer arena (v0.1) | +| | Per-event heap (rejected) | Double-buffer arena (current) | |---|---|---| | Alloc cost | N mallocs per flush | 1 reset per flush | | Free cost | N frees | 1 reset (O(1)) | @@ -237,9 +141,9 @@ total fixed = 4MB ## Crash delivery -### v0.1: best-effort +### Current behavior: best-effort -Delivery guarantee in v0.1: +Current delivery guarantees: | Shutdown path | Outcome | |---|---| @@ -248,17 +152,16 @@ Delivery guarantee in v0.1: | Zig panic (unhandled) | Queue lost — no delivery | | OOM during flush | Retry up to max_retries, then drop | -**This is honest and documented.** For handled application errors (`captureException` -on a caught error) the queue path is fine — the process is healthy. For true crashes -the queue cannot help. +For handled application errors (`captureException` on a caught error) the queue path +is healthy. For true crashes the queue cannot guarantee delivery. -### v0.2: crash file (Ghostty pattern) +### Upcoming: crash file (Ghostty pattern) Ghostty's crash reporter writes a `.ghosttycrash` envelope to disk in the transport callback — no network call, no allocator, one `write()` syscall on an already-open file descriptor. The file is uploaded on next startup. -For posthog-zig v0.2, `captureException` with `level == .fatal` will: +In an upcoming release, `captureException` with `level == .fatal` will: 1. Serialize the event into a stack buffer (no allocator) 2. Write synchronously to `$RUNTIME_DIR/posthog-crash-{uuid}.jsonl` @@ -280,7 +183,7 @@ A disk write requires: - A file descriptor (already open, or openable with `O_CREAT | O_APPEND`) - One `write()` syscall -The double-buffer arena design of v0.2 makes the disk write trivially cheap: +The double-buffer arena design for the upcoming release makes the disk write trivially cheap: the write-side arena is one contiguous slice — a single `write()` of `arena_a.buffer[0..arena_a.end_index]` captures all pending events without any additional allocation. @@ -295,11 +198,11 @@ registers one in its root source file: const posthog_client = &global_state.posthog; // app-owned pointer pub fn panic(msg: []const u8, trace: ?*std.builtin.StackTrace, ret_addr: ?usize) noreturn { - // v0.1: best-effort — flush thread may or may not still be alive + // Current behavior: best-effort — flush thread may or may not still be alive // Call deinit only if you are confident the allocator is healthy. // On OOM or UB-triggered panics, skip this. - // v0.2: write crash file — safe because it requires no allocator + // Upcoming: write crash file — safe because it requires no allocator // posthog_client.writeCrashFile() catch {}; std.debug.defaultPanic(msg, trace, ret_addr); @@ -307,7 +210,7 @@ pub fn panic(msg: []const u8, trace: ?*std.builtin.StackTrace, ret_addr: ?usize) ``` Keep the panic handler **minimal**. If the panic was caused by allocator corruption, -complex work inside the handler will itself crash. The crash file write in v0.2 is +complex work inside the handler will itself crash. The crash file write in the upcoming release is designed to require zero allocations for this reason. --- @@ -400,25 +303,6 @@ pub fn flush(self: *PostHogClient) !void { This is intentional: `flush()` is designed for use in shutdown sequences where the process is about to exit. A retry loop inside `flush()` would block `deinit()` beyond the caller's expectation. If retry-on-manual-flush is needed, call `flush()` in a loop with your own backoff logic, or rely on the background thread. -**`shutdown_flush_timeout_ms` is not yet enforced.** `FlushThread.stop()` calls `thread.join()` which is unbounded in v0.1. The timeout parameter is accepted for API stability and will enforce a timed join in v0.2. +**`shutdown_flush_timeout_ms` is not yet enforced.** `FlushThread.stop()` calls `thread.join()` which is currently unbounded. The timeout parameter is accepted for API stability and will enforce a timed join in an upcoming release. --- - -## What we deliberately excluded - -**Redis Streams bus pattern.** Considered: events → Redis Stream → posthog-node -consumer. Rejected: posthog-zig IS the fire-and-forget buffer layer. A bus adds -an operational dependency (Redis must be available) without removing the network -I/O problem — you still need something to read from the stream and call PostHog. -The background flush thread is simpler and removes the operational dependency. - -**JSONL audit log.** Considered: write every event to a local JSONL file as a -delivery receipt. Rejected for v0.1: adds I/O on every enqueue, complicates -cleanup, and the `on_deliver` callback covers the observability need. The v0.2 -crash file is a targeted version of this idea, scoped to fatal events only. - -**Per-event retry queue on disk.** Rejected for v0.1: scope creep. The in-memory -retry (up to `max_retries` with exponential backoff) is sufficient for transient -network failures. Persistent retry requires a compaction strategy, read-back -logic, and a format specification. Worth revisiting if PostHog ingestion proves -unreliable in production.