lloyal-ai · lloyal-research · Apr 2, 2026 · Apr 1, 2026 · Apr 2, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -55,7 +55,10 @@ jobs:
     name: GPU Integration Tests
     needs: build
     runs-on: ubuntu-latest
-    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+    if: >-
+      github.event_name == 'pull_request' &&
+      github.event.pull_request.head.repo.full_name == github.repository &&
+      !contains(github.event.pull_request.labels.*.name, 'skip-gpu-tests')
 
     steps:
       - name: Dispatch GPU tests to lloyal-node

diff --git a/packages/agents/src/Agent.ts b/packages/agents/src/Agent.ts
@@ -40,7 +40,7 @@ export type ResultSource =
 export interface FormatConfig {
   format: number;
   reasoningFormat: number;
-  thinkingForcedOpen: boolean;
+  generationPrompt: string;
   parser: string;
   grammar: string;
   grammarLazy: boolean;

diff --git a/packages/agents/src/Tool.ts b/packages/agents/src/Tool.ts
@@ -66,6 +66,23 @@ export abstract class Tool<TArgs = Record<string, unknown>> {
    */
   abstract execute(args: TArgs, context?: ToolContext): Operation<unknown>;
 
+  /**
+   * Optional reasoning probe prefilled after this tool's result settles.
+   *
+   * When set, the pool prefills this text into the agent's context after
+   * the tool result, before the lazy grammar resets. This nudges the model
+   * to reason in prose about the result before generating the next tool call.
+   *
+   * Return null/undefined to skip (default). Only applies to real tool
+   * dispatches — nudges and settle rejects are unaffected.
+   *
+   * @example
+   * ```typescript
+   * get probe() { return 'Wait, '; }
+   * ```
+   */
+  get probe(): string | null { return null; }
+
   /**
    * OpenAI-compatible function tool schema
    *

diff --git a/packages/agents/src/agent-pool.ts b/packages/agents/src/agent-pool.ts
@@ -165,7 +165,7 @@ function* setupAgent(
     fmt: {
       format: fmt.format,
       reasoningFormat: fmt.reasoningFormat,
-      thinkingForcedOpen: fmt.thinkingForcedOpen,
+      generationPrompt: fmt.generationPrompt,
       parser: fmt.parser,
       grammar: fmt.grammar,
       grammarLazy: fmt.grammarLazy,
@@ -357,6 +357,7 @@ export function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>
     // in the next tick. DISPATCH awaits each tool to completion via
     // scoped() + call() — no concurrent llama_decode possible.
     const settledBuffer: SettledTool[] = [];
+    const dispatchedProbes = new Map<number, string>();
     const agentById = new Map(agents.map(a => [a.id, a]));
 
     let steps = 0;
@@ -397,7 +398,7 @@ export function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>
         if (isStop) {
           const parsed = ctx.parseChatOutput(a.rawOutput, a.fmt.format, {
             reasoningFormat: a.fmt.reasoningFormat,
-            thinkingForcedOpen: a.fmt.thinkingForcedOpen,
+            generationPrompt: a.fmt.generationPrompt,
             parser: a.fmt.parser,
           });
 
@@ -558,6 +559,20 @@ export function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>
           counters.warmPrefillCalls++;
           counters.warmPrefillBranches += prefillPairs.length;
 
+          // Prefill per-tool reasoning probes for agents that just got real
+          // tool results. Each tool can optionally return a probe string via
+          // its `probe` getter — prefilled after the tool result to nudge the
+          // model into prose reasoning before the next tool call.
+          const probePairs: [Branch, number[]][] = [];
+          for (const a of settledAgents) {
+            const probe = dispatchedProbes.get(a.id);
+            if (probe) probePairs.push([a.branch, ctx.tokenizeSync(probe, false)]);
+          }
+          if (probePairs.length > 0) {
+            yield* call(() => store.prefill(probePairs));
+          }
+          dispatchedProbes.clear();
+
           // Only NOW transition state + reset grammar
           for (const a of settledAgents) {
             a.transition('active');
@@ -648,6 +663,8 @@ export function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>
 
           const prefillTokens = buildToolResultDelta(ctx, resultStr, callId);
           settledBuffer.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId });
+          const probe = tool?.probe;
+          if (probe) dispatchedProbes.set(agent.id, probe);
 
           tw.write({
             traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),

diff --git a/packages/agents/test/Agent.test.ts b/packages/agents/test/Agent.test.ts
@@ -3,7 +3,7 @@ import { Agent } from '../src/Agent';
 import { createMockBranch } from './helpers/mock-branch';
 
 const FMT = {
-  format: 0, reasoningFormat: 0, thinkingForcedOpen: false,
+  format: 0, reasoningFormat: 0, generationPrompt: '',
   parser: '', grammar: '', grammarLazy: false, grammarTriggers: [],
 };
 

diff --git a/packages/agents/test/AgentPolicy.test.ts b/packages/agents/test/AgentPolicy.test.ts
@@ -5,7 +5,7 @@ import { Agent } from '../src/Agent';
 import { createMockBranch } from './helpers/mock-branch';
 
 const FMT = {
-  format: 0, reasoningFormat: 0, thinkingForcedOpen: false,
+  format: 0, reasoningFormat: 0, generationPrompt: '',
   parser: '', grammar: '', grammarLazy: false, grammarTriggers: [],
 };
 

diff --git a/packages/agents/test/agent-pool.test.ts b/packages/agents/test/agent-pool.test.ts
@@ -674,3 +674,199 @@ describe('pressure thresholds propagation', () => {
     }
   });
 });
+
+// ── Group 7: Tool probe lifecycle hook ──────────────────────────
+
+describe('tool probe lifecycle hook', () => {
+  /** Tool with a probe — returns "Wait, " after result settles */
+  class ProbeTool extends Tool<{ query: string }> {
+    readonly name = 'web_search';
+    readonly description = 'search with probe';
+    readonly parameters = { type: 'object' as const, properties: { query: { type: 'string' as const } } };
+    get probe() { return 'Wait, '; }
+    *execute(): Operation<unknown> { return { results: ['result'] }; }
+  }
+
+  /** Tool without a probe — default null */
+  class NoProbeTool extends Tool<{ query: string }> {
+    readonly name = 'web_search';
+    readonly description = 'search without probe';
+    readonly parameters = { type: 'object' as const, properties: { query: { type: 'string' as const } } };
+    *execute(): Operation<unknown> { return { results: ['result'] }; }
+  }
+
+  function toolCallPolicy(): AgentPolicy {
+    return stubPolicy({
+      shouldExit: () => false,
+      onProduced: (_a, parsed) => {
+        if (parsed.toolCalls.length > 0) return { type: 'tool_call', tc: parsed.toolCalls[0] };
+        return { type: 'idle', reason: 'free_text_stop' };
+      },
+      onSettleReject: () => ({ type: 'idle', reason: 'pressure_settle_reject' }),
+    });
+  }
+
+  it('7a: tool with probe → extra prefill after tool result', async () => {
+    const probeTool = new ProbeTool();
+    const toolMap = new Map<string, Tool>([['web_search', probeTool]]);
+
+    const { ctx, store, root } = createMockSdk({ nCtx: 16384, cellsUsed: 1000 });
+
+    // Track prefill calls on the single ctx
+    let prefillCallCount = 0;
+    const origPrefill = ctx._storePrefill.bind(ctx);
+    ctx._storePrefill = async (handles: number[], tokenArrays: number[][]) => {
+      prefillCallCount++;
+      return origPrefill(handles, tokenArrays);
+    };
+
+    // Wire token queues
+    let forkCount = 0;
+    const branchForkIndex = new Map<number, number>();
+    const branchSampleCount = new Map<number, number>();
+    const origFork = ctx._branchFork.bind(ctx);
+    ctx._branchFork = (parentHandle: number): number => {
+      const handle = origFork(parentHandle);
+      branchForkIndex.set(handle, forkCount++);
+      branchSampleCount.set(handle, 0);
+      return handle;
+    };
+    const queues = [[1, STOP, STOP]];
+    ctx._branchSample = (handle: number): number => {
+      const fi = branchForkIndex.get(handle) ?? -1;
+      const queue = fi >= 0 ? (queues[fi] ?? [STOP]) : [STOP];
+      const idx = branchSampleCount.get(handle) ?? 0;
+      branchSampleCount.set(handle, idx + 1);
+      return idx < queue.length ? queue[idx] : STOP;
+    };
+    ctx.parseChatOutput = (raw: string) => {
+      if (!raw || raw === '') return { content: '', reasoningContent: '', toolCalls: [] };
+      return { content: '', reasoningContent: '', toolCalls: [{ name: 'web_search', arguments: '{"query":"test"}', id: 'c1' }] };
+    };
+
+    const traceWriter = new CapturingTraceWriter();
+    await root.prefill(ctx.tokenizeSync('system'));
+    prefillCallCount = 0; // reset after root prefill
+
+    await run(function* () {
+      yield* Ctx.set(ctx as any);
+      yield* Store.set(store);
+      const events: Channel<AgentEvent, void> = createChannel();
+      yield* Events.set(events as any);
+      yield* Trace.set(traceWriter);
+      yield* spawn(function* () { for (const ev of yield* each(events)) { yield* each.next(); } });
+
+      return yield* scoped(function* () {
+        return yield* useAgentPool({
+          tasks: [{ systemPrompt: 'Agent', content: 'Task', tools: JSON.stringify([probeTool.schema]), parent: root, seed: 0 }],
+          tools: toolMap,
+          policy: toolCallPolicy(),
+          maxTurns: 100,
+        });
+      });
+    });
+
+    // Prefill calls: 1 (agent suffix) + 1 (tool result) + 1 (probe) = 3 minimum
+    expect(prefillCallCount).toBeGreaterThanOrEqual(3);
+  });
+
+  it('7b: tool without probe → no extra prefill (noop)', async () => {
+    const noProbeTool = new NoProbeTool();
+    const toolMap = new Map<string, Tool>([['web_search', noProbeTool]]);
+
+    const { ctx, store, root } = createMockSdk({ nCtx: 16384, cellsUsed: 1000 });
+
+    let prefillCallCount = 0;
+    const origPrefill = ctx._storePrefill.bind(ctx);
+    ctx._storePrefill = async (handles: number[], tokenArrays: number[][]) => {
+      prefillCallCount++;
+      return origPrefill(handles, tokenArrays);
+    };
+
+    let forkCount = 0;
+    const branchForkIndex = new Map<number, number>();
+    const branchSampleCount = new Map<number, number>();
+    const origFork = ctx._branchFork.bind(ctx);
+    ctx._branchFork = (parentHandle: number): number => {
+      const handle = origFork(parentHandle);
+      branchForkIndex.set(handle, forkCount++);
+      branchSampleCount.set(handle, 0);
+      return handle;
+    };
+    const queues = [[1, STOP, STOP]];
+    ctx._branchSample = (handle: number): number => {
+      const fi = branchForkIndex.get(handle) ?? -1;
+      const queue = fi >= 0 ? (queues[fi] ?? [STOP]) : [STOP];
+      const idx = branchSampleCount.get(handle) ?? 0;
+      branchSampleCount.set(handle, idx + 1);
+      return idx < queue.length ? queue[idx] : STOP;
+    };
+    ctx.parseChatOutput = (raw: string) => {
+      if (!raw || raw === '') return { content: '', reasoningContent: '', toolCalls: [] };
+      return { content: '', reasoningContent: '', toolCalls: [{ name: 'web_search', arguments: '{"query":"test"}', id: 'c1' }] };
+    };
+
+    const traceWriter = new CapturingTraceWriter();
+    await root.prefill(ctx.tokenizeSync('system'));
+    prefillCallCount = 0;
+
+    await run(function* () {
+      yield* Ctx.set(ctx as any);
+      yield* Store.set(store);
+      const events: Channel<AgentEvent, void> = createChannel();
+      yield* Events.set(events as any);
+      yield* Trace.set(traceWriter);
+      yield* spawn(function* () { for (const ev of yield* each(events)) { yield* each.next(); } });
+
+      return yield* scoped(function* () {
+        return yield* useAgentPool({
+          tasks: [{ systemPrompt: 'Agent', content: 'Task', tools: JSON.stringify([noProbeTool.schema]), parent: root, seed: 0 }],
+          tools: toolMap,
+          policy: toolCallPolicy(),
+          maxTurns: 100,
+        });
+      });
+    });
+
+    // Prefill calls: 1 (agent suffix) + 1 (tool result) = 2 — NO probe prefill
+    expect(prefillCallCount).toBe(2);
+  });
+
+  it('7c: default Tool.probe returns null', () => {
+    const tool = new NoProbeTool();
+    expect(tool.probe).toBeNull();
+  });
+
+  it('7d: probe does not fire on nudge', async () => {
+    // Tool has a probe, but the agent gets nudged — probe should NOT fire
+    const probeTool = new ProbeTool();
+    const toolMap = new Map<string, Tool>([['web_search', probeTool]]);
+
+    const { result, ctx } = await runPool({
+      forkTokenQueues: [[1, 2, STOP, 3, STOP]],
+      parseChatOutputFn: (raw) => {
+        if (!raw || raw === '') return { content: '', reasoningContent: '', toolCalls: [] };
+        return { content: '', reasoningContent: '', toolCalls: [{ name: 'web_search', arguments: '{}', id: 'c1' }] };
+      },
+      tools: toolMap,
+      policy: (() => {
+        let nudged = false;
+        return stubPolicy({
+          shouldExit: () => false,
+          onProduced: (_a, parsed) => {
+            if (parsed.toolCalls.length > 0 && !nudged) {
+              nudged = true;
+              return { type: 'nudge', message: 'Report now.' };
+            }
+            return { type: 'idle', reason: 'free_text_stop' };
+          },
+          onSettleReject: () => ({ type: 'idle', reason: 'pressure_settle_reject' }),
+        });
+      })(),
+    });
+
+    // Agent was nudged, not dispatched — probe tool exists but probe should not fire
+    // Agent should complete without errors
+    expect(result.agents[0]).toBeDefined();
+  });
+});
diff --git a/packages/agents/test/helpers/effection-harness.ts b/packages/agents/test/helpers/effection-harness.ts
@@ -21,13 +21,13 @@ export function createMockSessionContext(opts?: {
     tokenizeSync: (text: string) => Array.from({ length: Math.ceil(text.length / 4) }, (_, i) => i + 1),
     formatChat: async (msgs: string, opts?: Record<string, unknown>) => ({
       prompt: `<formatted>${msgs}</formatted>`,
-      format: 1, reasoningFormat: 0, thinkingForcedOpen: false,
+      format: 1, reasoningFormat: 0, generationPrompt: '',
       parser: 'default', grammar: '', grammarLazy: true,
       grammarTriggers: [],
     }),
     formatChatSync: (msgs: string) => ({
       prompt: `<formatted>${msgs}</formatted>`,
-      format: 1, reasoningFormat: 0, thinkingForcedOpen: false,
+      format: 1, reasoningFormat: 0, generationPrompt: '',
       parser: 'default', grammar: '', grammarLazy: true,
       grammarTriggers: [],
     }),

diff --git a/packages/agents/test/spawn-agents.test.ts b/packages/agents/test/spawn-agents.test.ts
@@ -350,7 +350,7 @@ describe('Agent.task', () => {
     const branch = createMockBranch();
     const a = new Agent({
       id: 1, parentId: 0, branch: branch as any,
-      fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
+      fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
       task: 'investigate speculative decoding on M3',
     });
     expect(a.task).toBe('investigate speculative decoding on M3');
@@ -360,7 +360,7 @@ describe('Agent.task', () => {
     const branch = createMockBranch();
     const a = new Agent({
       id: 1, parentId: 0, branch: branch as any,
-      fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
+      fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
     });
     expect(a.task).toBe('');
   });
@@ -376,7 +376,7 @@ describe('Explore/exploit decoupled from lifecycle', () => {
     const branch = createMockBranch();
     const a = new Agent({
       id: 1, parentId: 0, branch: branch as any,
-      fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
+      fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
     });
     a.transition('active');
     a.incrementToolCalls();
@@ -408,7 +408,7 @@ describe('Explore/exploit decoupled from lifecycle', () => {
     const branch = createMockBranch();
     const a = new Agent({
       id: 1, parentId: 0, branch: branch as any,
-      fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
+      fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
     });
     a.transition('active');
     a.incrementToolCalls();
@@ -438,7 +438,7 @@ describe('Explore/exploit decoupled from lifecycle', () => {
     const policy = new DefaultAgentPolicy({ exploreThreshold: 40 });
     const a = new Agent({
       id: 1, parentId: 0, branch: createMockBranch() as any,
-      fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
+      fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
     });
 
     const highPressure = {