Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ jobs:
name: GPU Integration Tests
needs: build
runs-on: ubuntu-latest
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
if: >-
github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.full_name == github.repository &&
!contains(github.event.pull_request.labels.*.name, 'skip-gpu-tests')

steps:
- name: Dispatch GPU tests to lloyal-node
Expand Down
2 changes: 1 addition & 1 deletion packages/agents/src/Agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export type ResultSource =
export interface FormatConfig {
format: number;
reasoningFormat: number;
thinkingForcedOpen: boolean;
generationPrompt: string;
parser: string;
grammar: string;
grammarLazy: boolean;
Expand Down
17 changes: 17 additions & 0 deletions packages/agents/src/Tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,23 @@ export abstract class Tool<TArgs = Record<string, unknown>> {
*/
abstract execute(args: TArgs, context?: ToolContext): Operation<unknown>;

/**
* Optional reasoning probe prefilled after this tool's result settles.
*
* When set, the pool prefills this text into the agent's context after
* the tool result, before the lazy grammar resets. This nudges the model
* to reason in prose about the result before generating the next tool call.
*
* Return null/undefined to skip (default). Only applies to real tool
* dispatches — nudges and settle rejects are unaffected.
*
* @example
* ```typescript
* get probe() { return 'Wait, '; }
* ```
*/
get probe(): string | null { return null; }

/**
* OpenAI-compatible function tool schema
*
Expand Down
21 changes: 19 additions & 2 deletions packages/agents/src/agent-pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ function* setupAgent(
fmt: {
format: fmt.format,
reasoningFormat: fmt.reasoningFormat,
thinkingForcedOpen: fmt.thinkingForcedOpen,
generationPrompt: fmt.generationPrompt,
parser: fmt.parser,
grammar: fmt.grammar,
grammarLazy: fmt.grammarLazy,
Expand Down Expand Up @@ -357,6 +357,7 @@ export function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>
// in the next tick. DISPATCH awaits each tool to completion via
// scoped() + call() — no concurrent llama_decode possible.
const settledBuffer: SettledTool[] = [];
const dispatchedProbes = new Map<number, string>();
const agentById = new Map(agents.map(a => [a.id, a]));

let steps = 0;
Expand Down Expand Up @@ -397,7 +398,7 @@ export function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>
if (isStop) {
const parsed = ctx.parseChatOutput(a.rawOutput, a.fmt.format, {
reasoningFormat: a.fmt.reasoningFormat,
thinkingForcedOpen: a.fmt.thinkingForcedOpen,
generationPrompt: a.fmt.generationPrompt,
parser: a.fmt.parser,
});

Expand Down Expand Up @@ -558,6 +559,20 @@ export function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>
counters.warmPrefillCalls++;
counters.warmPrefillBranches += prefillPairs.length;

// Prefill per-tool reasoning probes for agents that just got real
// tool results. Each tool can optionally return a probe string via
// its `probe` getter — prefilled after the tool result to nudge the
// model into prose reasoning before the next tool call.
const probePairs: [Branch, number[]][] = [];
for (const a of settledAgents) {
const probe = dispatchedProbes.get(a.id);
if (probe) probePairs.push([a.branch, ctx.tokenizeSync(probe, false)]);
}
if (probePairs.length > 0) {
yield* call(() => store.prefill(probePairs));
}
dispatchedProbes.clear();

// Only NOW transition state + reset grammar
for (const a of settledAgents) {
a.transition('active');
Expand Down Expand Up @@ -648,6 +663,8 @@ export function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>

const prefillTokens = buildToolResultDelta(ctx, resultStr, callId);
settledBuffer.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId });
const probe = tool?.probe;
if (probe) dispatchedProbes.set(agent.id, probe);

tw.write({
traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
Expand Down
2 changes: 1 addition & 1 deletion packages/agents/test/Agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Agent } from '../src/Agent';
import { createMockBranch } from './helpers/mock-branch';

const FMT = {
format: 0, reasoningFormat: 0, thinkingForcedOpen: false,
format: 0, reasoningFormat: 0, generationPrompt: '',
parser: '', grammar: '', grammarLazy: false, grammarTriggers: [],
};

Expand Down
2 changes: 1 addition & 1 deletion packages/agents/test/AgentPolicy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { Agent } from '../src/Agent';
import { createMockBranch } from './helpers/mock-branch';

const FMT = {
format: 0, reasoningFormat: 0, thinkingForcedOpen: false,
format: 0, reasoningFormat: 0, generationPrompt: '',
parser: '', grammar: '', grammarLazy: false, grammarTriggers: [],
};

Expand Down
196 changes: 196 additions & 0 deletions packages/agents/test/agent-pool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -674,3 +674,199 @@ describe('pressure thresholds propagation', () => {
}
});
});

// ── Group 7: Tool probe lifecycle hook ──────────────────────────

describe('tool probe lifecycle hook', () => {
/** Tool with a probe — returns "Wait, " after result settles */
class ProbeTool extends Tool<{ query: string }> {
readonly name = 'web_search';
readonly description = 'search with probe';
readonly parameters = { type: 'object' as const, properties: { query: { type: 'string' as const } } };
get probe() { return 'Wait, '; }
*execute(): Operation<unknown> { return { results: ['result'] }; }
}

/** Tool without a probe — default null */
class NoProbeTool extends Tool<{ query: string }> {
readonly name = 'web_search';
readonly description = 'search without probe';
readonly parameters = { type: 'object' as const, properties: { query: { type: 'string' as const } } };
*execute(): Operation<unknown> { return { results: ['result'] }; }
}

function toolCallPolicy(): AgentPolicy {
return stubPolicy({
shouldExit: () => false,
onProduced: (_a, parsed) => {
if (parsed.toolCalls.length > 0) return { type: 'tool_call', tc: parsed.toolCalls[0] };
return { type: 'idle', reason: 'free_text_stop' };
},
onSettleReject: () => ({ type: 'idle', reason: 'pressure_settle_reject' }),
});
}

it('7a: tool with probe → extra prefill after tool result', async () => {
const probeTool = new ProbeTool();
const toolMap = new Map<string, Tool>([['web_search', probeTool]]);

const { ctx, store, root } = createMockSdk({ nCtx: 16384, cellsUsed: 1000 });

// Track prefill calls on the single ctx
let prefillCallCount = 0;
const origPrefill = ctx._storePrefill.bind(ctx);
ctx._storePrefill = async (handles: number[], tokenArrays: number[][]) => {
prefillCallCount++;
return origPrefill(handles, tokenArrays);
};

// Wire token queues
let forkCount = 0;
const branchForkIndex = new Map<number, number>();
const branchSampleCount = new Map<number, number>();
const origFork = ctx._branchFork.bind(ctx);
ctx._branchFork = (parentHandle: number): number => {
const handle = origFork(parentHandle);
branchForkIndex.set(handle, forkCount++);
branchSampleCount.set(handle, 0);
return handle;
};
const queues = [[1, STOP, STOP]];
ctx._branchSample = (handle: number): number => {
const fi = branchForkIndex.get(handle) ?? -1;
const queue = fi >= 0 ? (queues[fi] ?? [STOP]) : [STOP];
const idx = branchSampleCount.get(handle) ?? 0;
branchSampleCount.set(handle, idx + 1);
return idx < queue.length ? queue[idx] : STOP;
};
ctx.parseChatOutput = (raw: string) => {
if (!raw || raw === '') return { content: '', reasoningContent: '', toolCalls: [] };
return { content: '', reasoningContent: '', toolCalls: [{ name: 'web_search', arguments: '{"query":"test"}', id: 'c1' }] };
};

const traceWriter = new CapturingTraceWriter();
await root.prefill(ctx.tokenizeSync('system'));
prefillCallCount = 0; // reset after root prefill

await run(function* () {
yield* Ctx.set(ctx as any);
yield* Store.set(store);
const events: Channel<AgentEvent, void> = createChannel();
yield* Events.set(events as any);
yield* Trace.set(traceWriter);
yield* spawn(function* () { for (const ev of yield* each(events)) { yield* each.next(); } });

return yield* scoped(function* () {
return yield* useAgentPool({
tasks: [{ systemPrompt: 'Agent', content: 'Task', tools: JSON.stringify([probeTool.schema]), parent: root, seed: 0 }],
tools: toolMap,
policy: toolCallPolicy(),
maxTurns: 100,
});
});
});

// Prefill calls: 1 (agent suffix) + 1 (tool result) + 1 (probe) = 3 minimum
expect(prefillCallCount).toBeGreaterThanOrEqual(3);
});

it('7b: tool without probe → no extra prefill (noop)', async () => {
const noProbeTool = new NoProbeTool();
const toolMap = new Map<string, Tool>([['web_search', noProbeTool]]);

const { ctx, store, root } = createMockSdk({ nCtx: 16384, cellsUsed: 1000 });

let prefillCallCount = 0;
const origPrefill = ctx._storePrefill.bind(ctx);
ctx._storePrefill = async (handles: number[], tokenArrays: number[][]) => {
prefillCallCount++;
return origPrefill(handles, tokenArrays);
};

let forkCount = 0;
const branchForkIndex = new Map<number, number>();
const branchSampleCount = new Map<number, number>();
const origFork = ctx._branchFork.bind(ctx);
ctx._branchFork = (parentHandle: number): number => {
const handle = origFork(parentHandle);
branchForkIndex.set(handle, forkCount++);
branchSampleCount.set(handle, 0);
return handle;
};
const queues = [[1, STOP, STOP]];
ctx._branchSample = (handle: number): number => {
const fi = branchForkIndex.get(handle) ?? -1;
const queue = fi >= 0 ? (queues[fi] ?? [STOP]) : [STOP];
const idx = branchSampleCount.get(handle) ?? 0;
branchSampleCount.set(handle, idx + 1);
return idx < queue.length ? queue[idx] : STOP;
};
ctx.parseChatOutput = (raw: string) => {
if (!raw || raw === '') return { content: '', reasoningContent: '', toolCalls: [] };
return { content: '', reasoningContent: '', toolCalls: [{ name: 'web_search', arguments: '{"query":"test"}', id: 'c1' }] };
};

const traceWriter = new CapturingTraceWriter();
await root.prefill(ctx.tokenizeSync('system'));
prefillCallCount = 0;

await run(function* () {
yield* Ctx.set(ctx as any);
yield* Store.set(store);
const events: Channel<AgentEvent, void> = createChannel();
yield* Events.set(events as any);
yield* Trace.set(traceWriter);
yield* spawn(function* () { for (const ev of yield* each(events)) { yield* each.next(); } });

return yield* scoped(function* () {
return yield* useAgentPool({
tasks: [{ systemPrompt: 'Agent', content: 'Task', tools: JSON.stringify([noProbeTool.schema]), parent: root, seed: 0 }],
tools: toolMap,
policy: toolCallPolicy(),
maxTurns: 100,
});
});
});

// Prefill calls: 1 (agent suffix) + 1 (tool result) = 2 — NO probe prefill
expect(prefillCallCount).toBe(2);
});

it('7c: default Tool.probe returns null', () => {
const tool = new NoProbeTool();
expect(tool.probe).toBeNull();
});

it('7d: probe does not fire on nudge', async () => {
// Tool has a probe, but the agent gets nudged — probe should NOT fire
const probeTool = new ProbeTool();
const toolMap = new Map<string, Tool>([['web_search', probeTool]]);

const { result, ctx } = await runPool({
forkTokenQueues: [[1, 2, STOP, 3, STOP]],
parseChatOutputFn: (raw) => {
if (!raw || raw === '') return { content: '', reasoningContent: '', toolCalls: [] };
return { content: '', reasoningContent: '', toolCalls: [{ name: 'web_search', arguments: '{}', id: 'c1' }] };
},
tools: toolMap,
policy: (() => {
let nudged = false;
return stubPolicy({
shouldExit: () => false,
onProduced: (_a, parsed) => {
if (parsed.toolCalls.length > 0 && !nudged) {
nudged = true;
return { type: 'nudge', message: 'Report now.' };
}
return { type: 'idle', reason: 'free_text_stop' };
},
onSettleReject: () => ({ type: 'idle', reason: 'pressure_settle_reject' }),
});
})(),
});

// Agent was nudged, not dispatched — probe tool exists but probe should not fire
// Agent should complete without errors
expect(result.agents[0]).toBeDefined();
});
});
4 changes: 2 additions & 2 deletions packages/agents/test/helpers/effection-harness.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ export function createMockSessionContext(opts?: {
tokenizeSync: (text: string) => Array.from({ length: Math.ceil(text.length / 4) }, (_, i) => i + 1),
formatChat: async (msgs: string, opts?: Record<string, unknown>) => ({
prompt: `<formatted>${msgs}</formatted>`,
format: 1, reasoningFormat: 0, thinkingForcedOpen: false,
format: 1, reasoningFormat: 0, generationPrompt: '',
parser: 'default', grammar: '', grammarLazy: true,
grammarTriggers: [],
}),
formatChatSync: (msgs: string) => ({
prompt: `<formatted>${msgs}</formatted>`,
format: 1, reasoningFormat: 0, thinkingForcedOpen: false,
format: 1, reasoningFormat: 0, generationPrompt: '',
parser: 'default', grammar: '', grammarLazy: true,
grammarTriggers: [],
}),
Expand Down
10 changes: 5 additions & 5 deletions packages/agents/test/spawn-agents.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ describe('Agent.task', () => {
const branch = createMockBranch();
const a = new Agent({
id: 1, parentId: 0, branch: branch as any,
fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
task: 'investigate speculative decoding on M3',
});
expect(a.task).toBe('investigate speculative decoding on M3');
Expand All @@ -360,7 +360,7 @@ describe('Agent.task', () => {
const branch = createMockBranch();
const a = new Agent({
id: 1, parentId: 0, branch: branch as any,
fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
});
expect(a.task).toBe('');
});
Expand All @@ -376,7 +376,7 @@ describe('Explore/exploit decoupled from lifecycle', () => {
const branch = createMockBranch();
const a = new Agent({
id: 1, parentId: 0, branch: branch as any,
fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
});
a.transition('active');
a.incrementToolCalls();
Expand Down Expand Up @@ -408,7 +408,7 @@ describe('Explore/exploit decoupled from lifecycle', () => {
const branch = createMockBranch();
const a = new Agent({
id: 1, parentId: 0, branch: branch as any,
fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
});
a.transition('active');
a.incrementToolCalls();
Expand Down Expand Up @@ -438,7 +438,7 @@ describe('Explore/exploit decoupled from lifecycle', () => {
const policy = new DefaultAgentPolicy({ exploreThreshold: 40 });
const a = new Agent({
id: 1, parentId: 0, branch: createMockBranch() as any,
fmt: { format: 0, reasoningFormat: 0, thinkingForcedOpen: false, parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
fmt: { format: 0, reasoningFormat: 0, generationPrompt: '', parser: '', grammar: '', grammarLazy: false, grammarTriggers: [] },
});

const highPressure = {
Expand Down
Loading
Loading