Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions src/dashboard/components/TestDashboard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import { TestSummary } from "./TestSummary";
import { TestFilters } from "./TestFilters";
import { TestCard } from "./TestCard";

declare const pendo: any;

interface TestResult {
description: string;
status: "pending" | "running" | "completed" | "failed";
Expand Down Expand Up @@ -187,6 +189,17 @@ export const TestDashboard: React.FC = () => {
setStartTime(Date.now());
setEndTime(undefined);

const runStartTime = Date.now();

if (typeof pendo !== "undefined") {
pendo.track("test_suite_executed", {
provider,
model,
totalTests: availableTests.length,
batchSize: 3,
});
}

try {
const response = await fetch("/api/run-tests", {
method: "POST",
Expand Down Expand Up @@ -281,7 +294,18 @@ export const TestDashboard: React.FC = () => {
<Select
fullWidth
value={provider}
onChange={(e) => setProvider(e.target.value as Providers)}
onChange={(e) => {
const newProvider = e.target.value as Providers;
if (typeof pendo !== "undefined") {
pendo.track("model_configuration_changed", {
provider: newProvider,
model,
previousProvider: provider,
previousModel: model,
});
}
setProvider(newProvider);
}}
sx={{ bgcolor: "background.paper" }}
>
{Object.values(Providers).map((p) => (
Expand All @@ -298,7 +322,18 @@ export const TestDashboard: React.FC = () => {
<Select
fullWidth
value={model}
onChange={(e) => setModel(e.target.value)}
onChange={(e) => {
const newModel = e.target.value;
if (typeof pendo !== "undefined") {
pendo.track("model_configuration_changed", {
provider,
model: newModel,
previousProvider: provider,
previousModel: model,
});
}
setModel(newModel);
}}
sx={{ bgcolor: "background.paper" }}
>
{Object.values(models).map((m) => (
Expand Down
38 changes: 35 additions & 3 deletions src/dashboard/components/TestFilters.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import {
Speed as SpeedIcon,
} from "@mui/icons-material";

declare const pendo: any;

interface TestFiltersProps {
categories: string[];
difficulties: string[];
Expand Down Expand Up @@ -50,7 +52,17 @@ export const TestFilters: React.FC<TestFiltersProps> = ({
fullWidth
label="Search Tests"
value={searchQuery}
onChange={(e) => onSearchChange(e.target.value)}
onChange={(e) => {
const value = e.target.value;
if (typeof pendo !== "undefined") {
pendo.track("test_filter_applied", {
searchQuery: value,
selectedCategory,
selectedDifficulty,
});
}
onSearchChange(value);
}}
placeholder="Search by test description..."
size="small"
/>
Expand All @@ -59,7 +71,17 @@ export const TestFilters: React.FC<TestFiltersProps> = ({
<InputLabel>Category</InputLabel>
<Select
value={selectedCategory}
onChange={(e) => onCategoryChange(e.target.value)}
onChange={(e) => {
const value = e.target.value;
if (typeof pendo !== "undefined") {
pendo.track("test_filter_applied", {
searchQuery,
selectedCategory: value,
selectedDifficulty,
});
}
onCategoryChange(value);
}}
label="Category"
>
<MenuItem value="">All Categories</MenuItem>
Expand All @@ -78,7 +100,17 @@ export const TestFilters: React.FC<TestFiltersProps> = ({
<InputLabel>Difficulty</InputLabel>
<Select
value={selectedDifficulty}
onChange={(e) => onDifficultyChange(e.target.value)}
onChange={(e) => {
const value = e.target.value;
if (typeof pendo !== "undefined") {
pendo.track("test_filter_applied", {
searchQuery,
selectedCategory,
selectedDifficulty: value,
});
}
onDifficultyChange(value);
}}
label="Difficulty"
>
<MenuItem value="">All Difficulties</MenuItem>
Expand Down
45 changes: 44 additions & 1 deletion src/evaluator/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
EvaluationConfig,
EvaluationMetric
} from '../types';
import { pendoTrack } from '../utils/pendoTrack';

interface TestFunction {
(): Promise<string>;
Expand Down Expand Up @@ -61,18 +62,60 @@ export class LLMEvaluator {
})
);

const metricProps: Record<string, unknown> = {
testDescription: test.description,
provider: this.config.model.provider,
model: this.config.model.name,
};
for (const mr of metricResults) {
const key = mr.metric.replace(/[\s-]/g, '');
metricProps[`${key}Score`] = Math.round(mr.score * 100) / 100;
}
const overallScore = metricResults.length > 0
? metricResults.reduce((s, m) => s + m.score, 0) / metricResults.length
: 0;
metricProps.overallScore = Math.round(overallScore * 100) / 100;
pendoTrack({
event: 'metric_evaluation_completed',
properties: metricProps,
});

// Run rule validations if configured
const ruleViolations = this.config.rules
const ruleViolations = this.config.rules
? await this.validateRules(actualOutput)
: [];

if (this.config.rules && this.config.rules.length > 0) {
pendoTrack({
event: 'rule_validation_completed',
properties: {
testDescription: test.description,
ruleCount: this.config.rules.length,
violationCount: ruleViolations.length,
violatedRules: ruleViolations.join(',').substring(0, 200),
},
});
}

// Get LLM evaluation
const evaluation = await this.evaluateResponse(
test.description,
response,
actualOutput
);

pendoTrack({
event: 'llm_evaluation_completed',
properties: {
testDescription: test.description,
evaluatorModel: this.config.evaluator.model.name,
evaluatorProvider: this.config.evaluator.model.provider,
score: evaluation.score,
feedbackLength: evaluation.feedback?.length || 0,
reasoningLength: evaluation.reasoning?.length || 0,
},
});

// Get overall summary
const summary = await this.summarizeEvaluation(metricResults);

Expand Down
84 changes: 77 additions & 7 deletions src/server/test-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { register } from 'ts-node';
import { initializeEmbeddings } from '../utils/embeddings';
import cors from 'cors';
import { setGlobalSettings } from '../test-utils/setup';
import { pendoTrack } from '../utils/pendoTrack';

interface RunTestsRequest {
provider: Providers;
Expand Down Expand Up @@ -73,6 +74,20 @@ export function setupTestRoutes(app: Express, io: Server) {

const tests = globalTestStore.getAllTests();
console.log(`Successfully loaded ${tests.length} tests from ${testFiles.length} files`);

const categories = [...new Set(tests.map((t: any) => t.metadata?.category).filter(Boolean))];
const difficulties = [...new Set(tests.map((t: any) => t.metadata?.difficulty).filter(Boolean))];
pendoTrack({
event: 'test_authoring_loaded',
properties: {
testFileCount: testFiles.length,
totalTestCount: tests.length,
categories: categories.join(','),
difficulties: difficulties.join(','),
pattern: '**/*.promptproof.test.{ts,js}',
},
});

return tests;
} catch (error) {
console.error('Error loading tests:', error);
Expand Down Expand Up @@ -124,6 +139,18 @@ export function setupTestRoutes(app: Express, io: Server) {
...result
};

pendoTrack({
event: 'individual_test_completed',
properties: {
testDescription: test.description,
status: 'completed',
score: finalResult.score,
durationMs: endTime - startTime,
category: test.metadata?.category || '',
difficulty: test.metadata?.difficulty || '',
},
});

// Emit individual test completion
io.emit('testResult', finalResult);
console.log(`Test completed: ${test.description}`);
Expand All @@ -142,7 +169,17 @@ export function setupTestRoutes(app: Express, io: Server) {
endTime: Date.now(),
metadata: test.metadata
};


pendoTrack({
event: 'individual_test_failed',
properties: {
testDescription: test.description,
errorMessage: error instanceof Error ? error.message : 'Unknown error',
category: test.metadata?.category || '',
difficulty: test.metadata?.difficulty || '',
},
});

// Emit test failure
io.emit('testResult', failedResult);
return failedResult;
Expand Down Expand Up @@ -209,17 +246,50 @@ export function setupTestRoutes(app: Express, io: Server) {
console.log(`Loaded ${tests.length} tests`);

// Run tests in parallel with batch size of 3
const suiteStartTime = Date.now();
const results = await runTestsInParallel(tests, evaluator, 3, io);

res.json({
success: true,
const suiteDuration = Date.now() - suiteStartTime;

const passedCount = results.filter((r: any) => r.status === 'completed' && r.score >= 0.8).length;
const failedCount = results.length - passedCount;
const avgScore = results.length > 0
? results.reduce((sum: number, r: any) => sum + (r.score || 0), 0) / results.length
: 0;

pendoTrack({
event: 'test_suite_completed',
properties: {
provider,
model,
totalTests: results.length,
passedCount,
failedCount,
totalDurationMs: suiteDuration,
averageScore: Math.round(avgScore * 100) / 100,
batchSize: 3,
},
});

res.json({
success: true,
message: `Completed ${results.length} tests`,
results
results
});
} catch (error) {
console.error('Error in test execution:', error);
res.status(500).json({
success: false,

pendoTrack({
event: 'test_suite_failed',
properties: {
provider,
model,
errorMessage: error instanceof Error ? error.message : 'Unknown error',
errorType: error instanceof Error ? error.constructor.name : 'Unknown',
},
});

res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
});
}
Expand Down
31 changes: 31 additions & 0 deletions src/utils/pendoTrack.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
const PENDO_DATA_HOST = 'https://data.pendo-dev.pendo-dev.com';
const PENDO_INTEGRATION_KEY = '73f9153a-be31-4f52-8f34-c746813d333e';

interface PendoTrackPayload {
event: string;
visitorId?: string;
accountId?: string;
properties?: Record<string, unknown>;
}

export async function pendoTrack(payload: PendoTrackPayload): Promise<void> {
try {
await fetch(`${PENDO_DATA_HOST}/data/track`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-pendo-integration-key': PENDO_INTEGRATION_KEY,
},
body: JSON.stringify({
type: 'track',
event: payload.event,
visitorId: payload.visitorId || 'system',
accountId: payload.accountId || 'system',
timestamp: Date.now(),
properties: payload.properties || {},
}),
});
} catch (error) {
console.error(`[Pendo] Failed to track event "${payload.event}":`, error);
}
}