Skip to content

Commit c2fec7d

Browse files
KoderFPVclaude
andcommitted
feat: Add product details agent for single product queries
Implement a dedicated productNode to handle product detail queries: - Support position-based references ("the first one", "#2") - Support name-based references ("Gaming Laptop Pro X1") - Add IProductAttribute interface for structured specifications - Track lastSearchResults in graph state between conversation turns - Extract specs from description as fallback when attributes missing Changes: - domain/product.ts: Add IProductAttribute and attributes field - agents/graph/state.ts: Add ISearchResult and lastSearchResults - agents/graph/nodes/productNode.ts: New node for product details - agents/prompts/productPrompts.ts: LLM prompts for reference extraction - models/products/productsModel.ts: Add findProductByName function - Updated routing, translations, and tests Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 4ba7812 commit c2fec7d

File tree

19 files changed

+1063
-41
lines changed

19 files changed

+1063
-41
lines changed

agents/__tests__/evaluation/conversationRunner.ts

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { executeChatGraphWithStream, IStreamCallback } from '@/agents/graph/chatGraph';
2+
import { ISearchResult } from '@/agents/graph/state';
23
import { IConversationTurn } from './evaluator';
34

45
export interface IConversationScenario {
@@ -32,22 +33,26 @@ export const runConversation = async (
3233
const callbacks = createNoopCallbacks();
3334

3435
const messages: Array<{ role: string; content: string }> = [];
36+
let lastSearchResults: ISearchResult | null = null;
3537

3638
for (const turn of scenario.turns) {
3739
messages.push({ role: 'user', content: turn.userMessage });
3840
conversation.push({ role: 'user', content: turn.userMessage });
3941

40-
const response = await executeChatGraphWithStream(
42+
const result = await executeChatGraphWithStream(
4143
sessionId,
4244
scenario.locale,
4345
messages,
44-
callbacks
46+
callbacks,
47+
lastSearchResults
4548
);
4649

47-
messages.push({ role: 'assistant', content: response });
48-
conversation.push({ role: 'assistant', content: response });
50+
lastSearchResults = result.lastSearchResults;
4951

50-
if (turn.validateResponse && !turn.validateResponse(response)) {
52+
messages.push({ role: 'assistant', content: result.response });
53+
conversation.push({ role: 'assistant', content: result.response });
54+
55+
if (turn.validateResponse && !turn.validateResponse(result.response)) {
5156
return {
5257
scenario,
5358
conversation,

agents/__tests__/evaluation/evaluator.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,26 @@ export const defaultChatCriteria: IEvaluationCriteria[] = [
164164
weight: 1,
165165
},
166166
];
167+
168+
export const defaultProductDetailsCriteria: IEvaluationCriteria[] = [
169+
{
170+
name: 'Accuracy',
171+
description: 'Does the assistant provide accurate product details?',
172+
weight: 3,
173+
},
174+
{
175+
name: 'Completeness',
176+
description: 'Does the response include relevant specifications?',
177+
weight: 2,
178+
},
179+
{
180+
name: 'Reference Understanding',
181+
description: 'Does the assistant correctly identify which product the user is asking about?',
182+
weight: 3,
183+
},
184+
{
185+
name: 'Natural Language',
186+
description: 'Is the response natural and easy to understand?',
187+
weight: 1,
188+
},
189+
];
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
2+
import {
3+
evaluateConversation,
4+
defaultProductDetailsCriteria,
5+
IEvaluationResult,
6+
IConversationTurn,
7+
} from './evaluator';
8+
import { runConversation, IConversationScenario } from './conversationRunner';
9+
import { clearLastRunDirectory, saveFailedTest } from './testResultsReporter';
10+
import { setupTestProducts, teardownTestProducts } from './testFixtures';
11+
12+
const MINIMUM_PASSING_SCORE = 3.5;
13+
14+
beforeAll(async () => {
15+
clearLastRunDirectory();
16+
await setupTestProducts();
17+
}, 60000);
18+
19+
afterAll(async () => {
20+
await teardownTestProducts();
21+
}, 30000);
22+
23+
const productDetailsScenarios: IConversationScenario[] = [
24+
{
25+
name: 'Product details by position',
26+
locale: 'en',
27+
turns: [
28+
{ userMessage: 'Show me laptops' },
29+
{ userMessage: 'What are the specs of the first one?' },
30+
],
31+
expectedBehavior:
32+
'After showing laptops, the assistant should provide detailed specifications of the first laptop including RAM, processor, storage from attributes or description.',
33+
},
34+
{
35+
name: 'Product details by name',
36+
locale: 'en',
37+
turns: [{ userMessage: 'Tell me about Gaming Laptop Pro X1' }],
38+
expectedBehavior:
39+
'The assistant should provide detailed information about the Gaming Laptop Pro X1 including specifications like RAM, GPU, and storage.',
40+
},
41+
{
42+
name: 'Product details in Polish',
43+
locale: 'pl',
44+
turns: [
45+
{ userMessage: 'Pokaż laptopy' },
46+
{ userMessage: 'Jaki procesor ma pierwszy?' },
47+
],
48+
expectedBehavior:
49+
'The assistant should provide processor details of the first laptop in Polish language.',
50+
},
51+
{
52+
name: 'Non-existent product',
53+
locale: 'en',
54+
turns: [{ userMessage: 'Tell me about SuperPhone 3000' }],
55+
expectedBehavior:
56+
'The assistant should indicate that the product was not found or ask for more information.',
57+
},
58+
{
59+
name: 'Product details by partial name',
60+
locale: 'en',
61+
turns: [{ userMessage: 'What specs does the iPhone have?' }],
62+
expectedBehavior:
63+
'The assistant should provide details about the iPhone 15 Pro Max including processor and storage.',
64+
},
65+
];
66+
67+
describe('Product Details E2E Evaluation', () => {
68+
describe.each(productDetailsScenarios)('Scenario: $name', (scenario) => {
69+
let evaluationResult: IEvaluationResult;
70+
let conversation: IConversationTurn[];
71+
72+
beforeAll(async () => {
73+
const conversationResult = await runConversation(scenario);
74+
conversation = conversationResult.conversation;
75+
76+
console.log(`\n=== Conversation: ${scenario.name} ===`);
77+
conversation.forEach((turn) => {
78+
console.log(`${turn.role.toUpperCase()}: ${turn.content}`);
79+
});
80+
81+
expect(conversationResult.success).toBe(true);
82+
83+
evaluationResult = await evaluateConversation(
84+
conversation,
85+
defaultProductDetailsCriteria,
86+
scenario.expectedBehavior
87+
);
88+
89+
console.log(`\nEvaluation Score: ${evaluationResult.score}`);
90+
console.log(`Reasoning: ${evaluationResult.reasoning}\n`);
91+
92+
if (evaluationResult.score < MINIMUM_PASSING_SCORE) {
93+
saveFailedTest(scenario, conversation, evaluationResult);
94+
}
95+
}, 180000);
96+
97+
it('should pass LLM evaluation with score >= 3.5', () => {
98+
expect(evaluationResult.score).toBeGreaterThanOrEqual(MINIMUM_PASSING_SCORE);
99+
expect(evaluationResult.passed).toBe(true);
100+
});
101+
102+
it('should have valid reasoning', () => {
103+
expect(evaluationResult.reasoning).toBeTruthy();
104+
expect(evaluationResult.reasoning.length).toBeGreaterThan(10);
105+
});
106+
});
107+
});
108+
109+
const MULTI_TURN_COMPLEX_MIN_SCORE = 3.0;
110+
111+
const multiTurnDetailsScenarios: Array<{
112+
scenario: IConversationScenario;
113+
minScore: number;
114+
}> = [
115+
{
116+
scenario: {
117+
name: 'Search then ask for multiple products',
118+
locale: 'en',
119+
turns: [
120+
{ userMessage: 'Show me smartphones' },
121+
{ userMessage: 'Tell me more about the first one' },
122+
{ userMessage: 'What about the second one?' },
123+
],
124+
expectedBehavior:
125+
'The assistant should show smartphones first, then provide details for the first smartphone, then provide details for the second smartphone. Each product should have specifications.',
126+
},
127+
minScore: MULTI_TURN_COMPLEX_MIN_SCORE,
128+
},
129+
{
130+
scenario: {
131+
name: 'Search then compare',
132+
locale: 'en',
133+
turns: [
134+
{ userMessage: 'I need a laptop' },
135+
{ userMessage: 'How much RAM does the first one have?' },
136+
],
137+
expectedBehavior:
138+
'The assistant should first show laptops, then provide the RAM specification for the first laptop when asked.',
139+
},
140+
minScore: MINIMUM_PASSING_SCORE,
141+
},
142+
];
143+
144+
describe('Multi-Turn Product Details E2E Evaluation', () => {
145+
describe.each(multiTurnDetailsScenarios)('Scenario: $scenario.name', ({ scenario, minScore }) => {
146+
let evaluationResult: IEvaluationResult;
147+
let conversation: IConversationTurn[];
148+
149+
beforeAll(async () => {
150+
const conversationResult = await runConversation(scenario);
151+
conversation = conversationResult.conversation;
152+
153+
console.log(`\n=== Multi-Turn: ${scenario.name} ===`);
154+
conversation.forEach((turn) => {
155+
console.log(`${turn.role.toUpperCase()}: ${turn.content}`);
156+
});
157+
158+
expect(conversationResult.success).toBe(true);
159+
160+
evaluationResult = await evaluateConversation(
161+
conversation,
162+
defaultProductDetailsCriteria,
163+
scenario.expectedBehavior
164+
);
165+
166+
console.log(`\nEvaluation Score: ${evaluationResult.score}`);
167+
console.log(`Reasoning: ${evaluationResult.reasoning}\n`);
168+
169+
if (evaluationResult.score < minScore) {
170+
saveFailedTest(scenario, conversation, evaluationResult);
171+
}
172+
}, 240000);
173+
174+
it(`should pass LLM evaluation with score >= ${minScore}`, () => {
175+
expect(evaluationResult.score).toBeGreaterThanOrEqual(minScore);
176+
});
177+
178+
it('should have valid reasoning', () => {
179+
expect(evaluationResult.reasoning).toBeTruthy();
180+
expect(evaluationResult.reasoning.length).toBeGreaterThan(10);
181+
});
182+
});
183+
});

agents/__tests__/evaluation/testFixtures.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
1313
stock: 15,
1414
category: 'Laptops',
1515
isActive: true,
16+
attributes: [
17+
{ name: 'RAM', value: '32', unit: 'GB' },
18+
{ name: 'GPU', value: 'RTX 4080' },
19+
{ name: 'Storage', value: '1', unit: 'TB SSD' },
20+
{ name: 'Processor', value: 'Intel Core i9-13900HX' },
21+
],
1622
},
1723
{
1824
name: 'Business Laptop Elite',
@@ -22,6 +28,12 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
2228
stock: 25,
2329
category: 'Laptops',
2430
isActive: true,
31+
attributes: [
32+
{ name: 'RAM', value: '16', unit: 'GB' },
33+
{ name: 'Processor', value: 'Intel Core i7-1365U' },
34+
{ name: 'Storage', value: '512', unit: 'GB SSD' },
35+
{ name: 'Weight', value: '1.3', unit: 'kg' },
36+
],
2537
},
2638
{
2739
name: 'Budget Laptop Basic',
@@ -31,6 +43,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
3143
stock: 50,
3244
category: 'Laptops',
3345
isActive: true,
46+
attributes: [
47+
{ name: 'RAM', value: '8', unit: 'GB' },
48+
{ name: 'Processor', value: 'Intel Core i5-1235U' },
49+
{ name: 'Storage', value: '256', unit: 'GB SSD' },
50+
],
3451
},
3552
{
3653
name: 'Samsung Galaxy S24 Ultra',
@@ -40,6 +57,12 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
4057
stock: 30,
4158
category: 'Smartphones',
4259
isActive: true,
60+
attributes: [
61+
{ name: 'RAM', value: '12', unit: 'GB' },
62+
{ name: 'Storage', value: '512', unit: 'GB' },
63+
{ name: 'Camera', value: '200', unit: 'MP' },
64+
{ name: 'Display', value: '6.8', unit: 'inch' },
65+
],
4366
},
4467
{
4568
name: 'iPhone 15 Pro Max',
@@ -49,6 +72,12 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
4972
stock: 20,
5073
category: 'Smartphones',
5174
isActive: true,
75+
attributes: [
76+
{ name: 'Processor', value: 'A17 Pro' },
77+
{ name: 'Storage', value: '256', unit: 'GB' },
78+
{ name: 'Display', value: '6.7', unit: 'inch' },
79+
{ name: 'Material', value: 'Titanium' },
80+
],
5281
},
5382
{
5483
name: 'Xiaomi 14 Pro',
@@ -58,6 +87,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
5887
stock: 40,
5988
category: 'Smartphones',
6089
isActive: true,
90+
attributes: [
91+
{ name: 'Processor', value: 'Snapdragon 8 Gen 3' },
92+
{ name: 'Storage', value: '256', unit: 'GB' },
93+
{ name: 'Camera', value: 'Leica' },
94+
],
6195
},
6296
{
6397
name: 'Mechanical Gaming Keyboard RGB',
@@ -67,6 +101,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
67101
stock: 100,
68102
category: 'Gaming Peripherals',
69103
isActive: true,
104+
attributes: [
105+
{ name: 'Switch Type', value: 'Cherry MX' },
106+
{ name: 'Backlight', value: 'RGB' },
107+
{ name: 'Keys', value: '104' },
108+
],
70109
},
71110
{
72111
name: 'Gaming Mouse Pro',
@@ -76,6 +115,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
76115
stock: 80,
77116
category: 'Gaming Peripherals',
78117
isActive: true,
118+
attributes: [
119+
{ name: 'DPI', value: '25000' },
120+
{ name: 'Buttons', value: '8' },
121+
{ name: 'Lighting', value: 'RGB' },
122+
],
79123
},
80124
{
81125
name: 'Sony WH-1000XM5 Headphones',
@@ -85,6 +129,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
85129
stock: 35,
86130
category: 'Audio',
87131
isActive: true,
132+
attributes: [
133+
{ name: 'Battery Life', value: '30', unit: 'hours' },
134+
{ name: 'Noise Cancellation', value: 'Active' },
135+
{ name: 'Connection', value: 'Wireless Bluetooth' },
136+
],
88137
},
89138
{
90139
name: 'AirPods Pro 2',
@@ -94,6 +143,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
94143
stock: 45,
95144
category: 'Audio',
96145
isActive: true,
146+
attributes: [
147+
{ name: 'Noise Cancellation', value: 'Active' },
148+
{ name: 'Audio', value: 'Spatial Audio' },
149+
{ name: 'Type', value: 'Wireless Earbuds' },
150+
],
97151
},
98152
];
99153

0 commit comments

Comments
 (0)