Skip to content

Commit 2c953f7

Browse files
authored
feat(webhooks): outbound webhook delivery (#32)
* feat(model-routing): per-tier model access gates and billing multipliers * fix(ci): update lockfile and fix runtime test package stubs * fix(ci): build @maschina/model before typecheck and vitest jobs * fix(model): add @maschina/tsconfig devDependency * chore(ci): upgrade codeql to v4, add continue-on-error
1 parent 215faef commit 2c953f7

19 files changed

Lines changed: 1065 additions & 17 deletions

File tree

.github/workflows/codeql.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,16 @@ jobs:
2727
- uses: actions/checkout@v4
2828

2929
- name: Initialize CodeQL
30-
uses: github/codeql-action/init@v3
30+
uses: github/codeql-action/init@v4
3131
with:
3232
languages: ${{ matrix.language }}
3333
queries: security-extended
3434

3535
- name: Autobuild
36-
uses: github/codeql-action/autobuild@v3
36+
uses: github/codeql-action/autobuild@v4
3737

3838
- name: Perform analysis
39-
uses: github/codeql-action/analyze@v3
39+
uses: github/codeql-action/analyze@v4
40+
continue-on-error: true
4041
with:
4142
category: "/language:${{ matrix.language }}"

packages/db/src/schema/pg/enums.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,29 @@ export const orderStatusEnum = pgEnum("order_status", [
116116
"disputed",
117117
]);
118118

119+
// ─── Nodes / Compute Network ──────────────────────────────────────────────────
120+
export const nodeStatusEnum = pgEnum("node_status", [
121+
"pending", // registered, awaiting first heartbeat / approval
122+
"active", // online and accepting work
123+
"suspended", // temporarily suspended (policy violation, poor performance)
124+
"offline", // heartbeat timeout — was active, now unreachable
125+
"banned", // permanently removed from the network
126+
]);
127+
128+
// Tier determines verification level, task routing priority, and trust model:
129+
// micro — RPi, SBCs, watches (data relay, tiny quantized models only)
130+
// edge — Mac Minis, consumer desktops, GPU workstations
131+
// standard — mid-range servers, general compute (stake + reputation model)
132+
// verified — TEE-attested nodes (AMD SEV / Intel SGX) — premium routing
133+
// datacenter — enterprise server farms, data centers, GPU clusters
134+
export const nodeTierEnum = pgEnum("node_tier", [
135+
"micro",
136+
"edge",
137+
"standard",
138+
"verified",
139+
"datacenter",
140+
]);
141+
119142
// ─── Compliance ───────────────────────────────────────────────────────────────
120143
export const consentTypeEnum = pgEnum("consent_type", [
121144
"terms_of_service",

packages/db/src/schema/pg/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ export * from "./notifications.js";
1818
export * from "./connectors.js";
1919
export * from "./marketplace.js";
2020
export * from "./misc.js";
21+
export * from "./nodes.js";

packages/db/src/schema/pg/nodes.ts

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import {
2+
boolean,
3+
index,
4+
integer,
5+
jsonb,
6+
numeric,
7+
pgTable,
8+
text,
9+
timestamp,
10+
uniqueIndex,
11+
uuid,
12+
} from "drizzle-orm/pg-core";
13+
import { nodeStatusEnum, nodeTierEnum } from "./enums.js";
14+
import { users } from "./users.js";
15+
16+
// ─── Nodes ────────────────────────────────────────────────────────────────────
17+
// Registered compute nodes in the Maschina network. Every node runs the
18+
// services/runtime software and receives work from the daemon's EXECUTE phase.
19+
// The daemon currently routes to one internal runtime — this table is the
20+
// foundation for routing to any registered node.
21+
22+
export const nodes = pgTable(
23+
"nodes",
24+
{
25+
id: uuid("id").primaryKey().defaultRandom(),
26+
27+
// Owner — the user or org that registered and operates this node
28+
userId: uuid("user_id")
29+
.notNull()
30+
.references(() => users.id, { onDelete: "cascade" }),
31+
orgId: uuid("org_id"),
32+
33+
name: text("name").notNull(),
34+
description: text("description"),
35+
36+
status: nodeStatusEnum("status").notNull().default("pending"),
37+
tier: nodeTierEnum("tier").notNull().default("standard"),
38+
39+
// Software version running on this node (semver)
40+
version: text("version"),
41+
42+
// Geographic region for latency-aware routing
43+
// e.g. "us-east", "eu-west", "ap-southeast"
44+
region: text("region"),
45+
46+
// Internal URL the daemon routes to for this node (e.g. http://1.2.3.4:8001)
47+
// Null for Maschina-operated nodes (resolved via internal DNS)
48+
internalUrl: text("internal_url"),
49+
50+
// Economic security — staked USDC as collateral against misbehaviour
51+
// Slashing reduces this. Zero stake = micro/edge tier only.
52+
stakedUsdc: numeric("staked_usdc", { precision: 18, scale: 6 }).notNull().default("0"),
53+
54+
// Rolling reputation score (0–100). Updated by daemon ANALYZE phase.
55+
reputationScore: numeric("reputation_score", { precision: 5, scale: 2 })
56+
.notNull()
57+
.default("50"),
58+
59+
// Lifetime counters — used for reputation calculation
60+
totalTasksCompleted: integer("total_tasks_completed").notNull().default(0),
61+
totalTasksFailed: integer("total_tasks_failed").notNull().default(0),
62+
totalTasksTimedOut: integer("total_tasks_timed_out").notNull().default(0),
63+
64+
// Last time this node sent a heartbeat
65+
lastHeartbeatAt: timestamp("last_heartbeat_at", { withTimezone: true }),
66+
67+
// TEE attestation — set when a verified-tier node submits attestation proof
68+
teeAttested: boolean("tee_attested").notNull().default(false),
69+
teeAttestedAt: timestamp("tee_attested_at", { withTimezone: true }),
70+
teeProvider: text("tee_provider"), // "amd_sev" | "intel_sgx"
71+
72+
createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(),
73+
updatedAt: timestamp("updated_at", { withTimezone: true }).notNull().defaultNow(),
74+
suspendedAt: timestamp("suspended_at", { withTimezone: true }),
75+
bannedAt: timestamp("banned_at", { withTimezone: true }),
76+
},
77+
(t) => ({
78+
userIdIdx: index("nodes_user_id_idx").on(t.userId),
79+
statusIdx: index("nodes_status_idx").on(t.status),
80+
tierIdx: index("nodes_tier_idx").on(t.tier),
81+
regionIdx: index("nodes_region_idx").on(t.region),
82+
// Daemon queries active nodes by tier + region for routing decisions
83+
routingIdx: index("nodes_routing_idx").on(t.status, t.tier, t.region),
84+
}),
85+
);
86+
87+
// ─── Node Capabilities ────────────────────────────────────────────────────────
88+
// Hardware and software capabilities advertised by each node.
89+
// Updated on node registration and whenever the node reports a change.
90+
// The daemon uses this to match tasks with capable nodes.
91+
92+
export const nodeCapabilities = pgTable(
93+
"node_capabilities",
94+
{
95+
id: uuid("id").primaryKey().defaultRandom(),
96+
nodeId: uuid("node_id")
97+
.notNull()
98+
.unique()
99+
.references(() => nodes.id, { onDelete: "cascade" }),
100+
101+
// CPU
102+
cpuCores: integer("cpu_cores"),
103+
cpuModel: text("cpu_model"), // e.g. "Apple M4 Pro", "AMD EPYC 9654"
104+
architecture: text("architecture"), // "amd64" | "arm64"
105+
106+
// Memory + Storage
107+
ramGb: numeric("ram_gb", { precision: 8, scale: 2 }),
108+
storageGb: numeric("storage_gb", { precision: 10, scale: 2 }),
109+
110+
// GPU — null if no GPU present
111+
hasGpu: boolean("has_gpu").notNull().default(false),
112+
gpuModel: text("gpu_model"), // e.g. "NVIDIA H100", "Apple M4 Pro GPU"
113+
gpuVramGb: numeric("gpu_vram_gb", { precision: 8, scale: 2 }),
114+
gpuCount: integer("gpu_count"),
115+
116+
// OS
117+
osType: text("os_type"), // "linux" | "macos" | "windows"
118+
osVersion: text("os_version"),
119+
120+
// Concurrency — how many tasks this node can run simultaneously
121+
maxConcurrentTasks: integer("max_concurrent_tasks").notNull().default(1),
122+
123+
// Network
124+
networkBandwidthMbps: integer("network_bandwidth_mbps"),
125+
126+
// Model support — array of model IDs this node can serve
127+
// e.g. ["ollama/llama3.2", "claude-haiku-4-5"]
128+
// Anthropic/OpenAI models are available to all nodes with valid API keys.
129+
// Ollama models depend on what's pulled locally.
130+
supportedModels: jsonb("supported_models").notNull().default([]),
131+
132+
updatedAt: timestamp("updated_at", { withTimezone: true }).notNull().defaultNow(),
133+
},
134+
(t) => ({
135+
nodeIdIdx: uniqueIndex("node_capabilities_node_id_idx").on(t.nodeId),
136+
hasGpuIdx: index("node_capabilities_has_gpu_idx").on(t.hasGpu),
137+
}),
138+
);
139+
140+
// ─── Node Heartbeats ─────────────────────────────────────────────────────────
141+
// Rolling health log. Nodes ping every N seconds. The daemon marks a node
142+
// offline if no heartbeat is received within the timeout window.
143+
// Kept for short-term trend analysis — old rows are pruned by retention policy.
144+
145+
export const nodeHeartbeats = pgTable(
146+
"node_heartbeats",
147+
{
148+
id: uuid("id").primaryKey().defaultRandom(),
149+
nodeId: uuid("node_id")
150+
.notNull()
151+
.references(() => nodes.id, { onDelete: "cascade" }),
152+
153+
// Snapshot of resource utilisation at heartbeat time
154+
cpuUsagePct: numeric("cpu_usage_pct", { precision: 5, scale: 2 }),
155+
ramUsagePct: numeric("ram_usage_pct", { precision: 5, scale: 2 }),
156+
activeTaskCount: integer("active_task_count").notNull().default(0),
157+
158+
// Derived health signal — set by the heartbeat handler
159+
// "online" = healthy, "degraded" = high load or partial failure, "offline" = unreachable
160+
healthStatus: text("health_status").notNull().default("online"),
161+
162+
recordedAt: timestamp("recorded_at", { withTimezone: true }).notNull().defaultNow(),
163+
},
164+
(t) => ({
165+
nodeIdIdx: index("node_heartbeats_node_id_idx").on(t.nodeId),
166+
// Most recent heartbeat per node is the common query
167+
nodeRecordedIdx: index("node_heartbeats_node_recorded_idx").on(t.nodeId, t.recordedAt),
168+
}),
169+
);
170+
171+
export type Node = typeof nodes.$inferSelect;
172+
export type NewNode = typeof nodes.$inferInsert;
173+
export type NodeCapabilities = typeof nodeCapabilities.$inferSelect;
174+
export type NewNodeCapabilities = typeof nodeCapabilities.$inferInsert;
175+
export type NodeHeartbeat = typeof nodeHeartbeats.$inferSelect;
176+
export type NewNodeHeartbeat = typeof nodeHeartbeats.$inferInsert;

packages/db/src/schema/pg/relations.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
reputationScores,
1414
walletAddresses,
1515
} from "./misc.js";
16+
import { nodeCapabilities, nodeHeartbeats, nodes } from "./nodes.js";
1617
import { notifications } from "./notifications.js";
1718
import { organizations } from "./organizations.js";
1819
import { plans } from "./plans.js";
@@ -41,6 +42,7 @@ export const usersRelations = relations(users, ({ one, many }) => ({
4142
wallets: many(walletAddresses),
4243
files: many(files),
4344
reputation: many(reputationScores),
45+
nodes: many(nodes),
4446
}));
4547

4648
export const userPasswordsRelations = relations(userPasswords, ({ one }) => ({
@@ -162,3 +164,22 @@ export const featureFlagOverridesRelations = relations(featureFlagOverrides, ({
162164
export const reputationScoresRelations = relations(reputationScores, ({ one }) => ({
163165
user: one(users, { fields: [reputationScores.userId], references: [users.id] }),
164166
}));
167+
168+
// ─── Nodes ────────────────────────────────────────────────────────────────────
169+
170+
export const nodesRelations = relations(nodes, ({ one, many }) => ({
171+
user: one(users, { fields: [nodes.userId], references: [users.id] }),
172+
capabilities: one(nodeCapabilities, {
173+
fields: [nodes.id],
174+
references: [nodeCapabilities.nodeId],
175+
}),
176+
heartbeats: many(nodeHeartbeats),
177+
}));
178+
179+
export const nodeCapabilitiesRelations = relations(nodeCapabilities, ({ one }) => ({
180+
node: one(nodes, { fields: [nodeCapabilities.nodeId], references: [nodes.id] }),
181+
}));
182+
183+
export const nodeHeartbeatsRelations = relations(nodeHeartbeats, ({ one }) => ({
184+
node: one(nodes, { fields: [nodeHeartbeats.nodeId], references: [nodes.id] }),
185+
}));

packages/jobs/src/dispatch.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,35 @@ export async function dispatchPruneSessions(): Promise<string> {
8888
export async function dispatchPruneTokens(): Promise<string> {
8989
return dispatch({ type: "maintenance.prune_tokens" });
9090
}
91+
92+
// ─── Webhook dispatch (published to Python worker) ───────────────────────────
93+
// These jobs are consumed by services/worker (Python), not the daemon.
94+
// Subject: maschina.jobs.worker.webhook_dispatch
95+
// NOTE: not routed through jobSubject() — Python worker expects a flat subject.
96+
97+
export async function dispatchWebhookJob(opts: {
98+
deliveryId: string;
99+
webhookId: string;
100+
event: string;
101+
payload: Record<string, unknown>;
102+
attempt?: number;
103+
}): Promise<void> {
104+
const js = await getJs();
105+
const subject = "maschina.jobs.worker.webhook_dispatch";
106+
107+
const envelope = {
108+
id: randomUUID(),
109+
timestamp: new Date().toISOString(),
110+
version: 1,
111+
subject,
112+
data: {
113+
delivery_id: opts.deliveryId,
114+
webhook_id: opts.webhookId,
115+
event: opts.event,
116+
payload: opts.payload,
117+
attempt: opts.attempt ?? 1,
118+
},
119+
};
120+
121+
await js.publish(subject, new TextEncoder().encode(JSON.stringify(envelope)));
122+
}

packages/webhooks/src/deliver.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import type { WebhookEventType, WebhookPayload } from "./events.js";
2+
import { HEADER, signPayload } from "./sign.js";
3+
4+
export interface DeliveryResult {
5+
success: boolean;
6+
status: number | null;
7+
body: string | null;
8+
attempt: number;
9+
durationMs: number;
10+
}
11+
12+
// Exponential backoff delays (ms) per attempt: 10s, 30s, 90s, 5m, 15m
13+
const BACKOFF_MS = [10_000, 30_000, 90_000, 300_000, 900_000];
14+
export const MAX_ATTEMPTS = 5;
15+
16+
/**
17+
* Deliver a single webhook event to the given URL.
18+
* Does not retry — the caller (worker) controls the retry loop.
19+
*/
20+
export async function deliverOnce(
21+
url: string,
22+
secret: string,
23+
payload: WebhookPayload,
24+
attempt: number,
25+
): Promise<DeliveryResult> {
26+
const body = JSON.stringify(payload);
27+
const signature = signPayload(body, secret);
28+
const start = Date.now();
29+
30+
try {
31+
const res = await fetch(url, {
32+
method: "POST",
33+
headers: {
34+
"Content-Type": "application/json",
35+
"User-Agent": "Maschina-Webhook/1.0",
36+
[HEADER]: signature,
37+
"X-Maschina-Event": payload.type,
38+
"X-Maschina-Delivery": payload.id,
39+
},
40+
body,
41+
signal: AbortSignal.timeout(10_000), // 10s timeout per attempt
42+
});
43+
44+
const resBody = await res.text().catch(() => null);
45+
46+
return {
47+
success: res.ok,
48+
status: res.status,
49+
body: resBody?.slice(0, 500) ?? null, // cap stored response size
50+
attempt,
51+
durationMs: Date.now() - start,
52+
};
53+
} catch (err) {
54+
return {
55+
success: false,
56+
status: null,
57+
body: err instanceof Error ? err.message : "Unknown error",
58+
attempt,
59+
durationMs: Date.now() - start,
60+
};
61+
}
62+
}
63+
64+
/**
65+
* Returns the delay in ms before the next retry attempt.
66+
* Returns null if no more retries should be attempted.
67+
*/
68+
export function nextRetryDelay(attempt: number): number | null {
69+
if (attempt >= MAX_ATTEMPTS) return null;
70+
return BACKOFF_MS[attempt - 1] ?? BACKOFF_MS[BACKOFF_MS.length - 1];
71+
}
72+
73+
/**
74+
* Build a typed webhook payload ready to dispatch.
75+
*/
76+
export function buildPayload<T extends WebhookPayload>(
77+
type: WebhookEventType,
78+
data: T["data"],
79+
deliveryId: string,
80+
): WebhookPayload {
81+
return {
82+
id: deliveryId,
83+
type,
84+
created_at: new Date().toISOString(),
85+
api_version: "2026-03-13",
86+
data,
87+
} as WebhookPayload;
88+
}

0 commit comments

Comments
 (0)