diff --git a/src/dashboard/components/TestDashboard.tsx b/src/dashboard/components/TestDashboard.tsx index abba66c..2b15e95 100644 --- a/src/dashboard/components/TestDashboard.tsx +++ b/src/dashboard/components/TestDashboard.tsx @@ -23,6 +23,8 @@ import { TestSummary } from "./TestSummary"; import { TestFilters } from "./TestFilters"; import { TestCard } from "./TestCard"; +declare const pendo: any; + interface TestResult { description: string; status: "pending" | "running" | "completed" | "failed"; @@ -187,6 +189,17 @@ export const TestDashboard: React.FC = () => { setStartTime(Date.now()); setEndTime(undefined); + const runStartTime = Date.now(); + + if (typeof pendo !== "undefined") { + pendo.track("test_suite_executed", { + provider, + model, + totalTests: availableTests.length, + batchSize: 3, + }); + } + try { const response = await fetch("/api/run-tests", { method: "POST", @@ -281,7 +294,18 @@ export const TestDashboard: React.FC = () => { setModel(e.target.value)} + onChange={(e) => { + const newModel = e.target.value; + if (typeof pendo !== "undefined") { + pendo.track("model_configuration_changed", { + provider, + model: newModel, + previousProvider: provider, + previousModel: model, + }); + } + setModel(newModel); + }} sx={{ bgcolor: "background.paper" }} > {Object.values(models).map((m) => ( diff --git a/src/dashboard/components/TestFilters.tsx b/src/dashboard/components/TestFilters.tsx index ac6d841..2e054a6 100644 --- a/src/dashboard/components/TestFilters.tsx +++ b/src/dashboard/components/TestFilters.tsx @@ -17,6 +17,8 @@ import { Speed as SpeedIcon, } from "@mui/icons-material"; +declare const pendo: any; + interface TestFiltersProps { categories: string[]; difficulties: string[]; @@ -50,7 +52,17 @@ export const TestFilters: React.FC = ({ fullWidth label="Search Tests" value={searchQuery} - onChange={(e) => onSearchChange(e.target.value)} + onChange={(e) => { + const value = e.target.value; + if (typeof pendo !== "undefined") { + pendo.track("test_filter_applied", { + searchQuery: value, + selectedCategory, + selectedDifficulty, + }); + } + onSearchChange(value); + }} placeholder="Search by test description..." size="small" /> @@ -59,7 +71,17 @@ export const TestFilters: React.FC = ({ Category onDifficultyChange(e.target.value)} + onChange={(e) => { + const value = e.target.value; + if (typeof pendo !== "undefined") { + pendo.track("test_filter_applied", { + searchQuery, + selectedCategory, + selectedDifficulty: value, + }); + } + onDifficultyChange(value); + }} label="Difficulty" > All Difficulties diff --git a/src/evaluator/core.ts b/src/evaluator/core.ts index 384b47e..45bd1f3 100644 --- a/src/evaluator/core.ts +++ b/src/evaluator/core.ts @@ -6,6 +6,7 @@ import { EvaluationConfig, EvaluationMetric } from '../types'; +import { pendoTrack } from '../utils/pendoTrack'; interface TestFunction { (): Promise; @@ -61,11 +62,41 @@ export class LLMEvaluator { }) ); + const metricProps: Record = { + testDescription: test.description, + provider: this.config.model.provider, + model: this.config.model.name, + }; + for (const mr of metricResults) { + const key = mr.metric.replace(/[\s-]/g, ''); + metricProps[`${key}Score`] = Math.round(mr.score * 100) / 100; + } + const overallScore = metricResults.length > 0 + ? metricResults.reduce((s, m) => s + m.score, 0) / metricResults.length + : 0; + metricProps.overallScore = Math.round(overallScore * 100) / 100; + pendoTrack({ + event: 'metric_evaluation_completed', + properties: metricProps, + }); + // Run rule validations if configured - const ruleViolations = this.config.rules + const ruleViolations = this.config.rules ? await this.validateRules(actualOutput) : []; + if (this.config.rules && this.config.rules.length > 0) { + pendoTrack({ + event: 'rule_validation_completed', + properties: { + testDescription: test.description, + ruleCount: this.config.rules.length, + violationCount: ruleViolations.length, + violatedRules: ruleViolations.join(',').substring(0, 200), + }, + }); + } + // Get LLM evaluation const evaluation = await this.evaluateResponse( test.description, @@ -73,6 +104,18 @@ export class LLMEvaluator { actualOutput ); + pendoTrack({ + event: 'llm_evaluation_completed', + properties: { + testDescription: test.description, + evaluatorModel: this.config.evaluator.model.name, + evaluatorProvider: this.config.evaluator.model.provider, + score: evaluation.score, + feedbackLength: evaluation.feedback?.length || 0, + reasoningLength: evaluation.reasoning?.length || 0, + }, + }); + // Get overall summary const summary = await this.summarizeEvaluation(metricResults); diff --git a/src/server/test-server.ts b/src/server/test-server.ts index a9d925f..f59c946 100644 --- a/src/server/test-server.ts +++ b/src/server/test-server.ts @@ -10,6 +10,7 @@ import { register } from 'ts-node'; import { initializeEmbeddings } from '../utils/embeddings'; import cors from 'cors'; import { setGlobalSettings } from '../test-utils/setup'; +import { pendoTrack } from '../utils/pendoTrack'; interface RunTestsRequest { provider: Providers; @@ -73,6 +74,20 @@ export function setupTestRoutes(app: Express, io: Server) { const tests = globalTestStore.getAllTests(); console.log(`Successfully loaded ${tests.length} tests from ${testFiles.length} files`); + + const categories = [...new Set(tests.map((t: any) => t.metadata?.category).filter(Boolean))]; + const difficulties = [...new Set(tests.map((t: any) => t.metadata?.difficulty).filter(Boolean))]; + pendoTrack({ + event: 'test_authoring_loaded', + properties: { + testFileCount: testFiles.length, + totalTestCount: tests.length, + categories: categories.join(','), + difficulties: difficulties.join(','), + pattern: '**/*.promptproof.test.{ts,js}', + }, + }); + return tests; } catch (error) { console.error('Error loading tests:', error); @@ -124,6 +139,18 @@ export function setupTestRoutes(app: Express, io: Server) { ...result }; + pendoTrack({ + event: 'individual_test_completed', + properties: { + testDescription: test.description, + status: 'completed', + score: finalResult.score, + durationMs: endTime - startTime, + category: test.metadata?.category || '', + difficulty: test.metadata?.difficulty || '', + }, + }); + // Emit individual test completion io.emit('testResult', finalResult); console.log(`Test completed: ${test.description}`); @@ -142,7 +169,17 @@ export function setupTestRoutes(app: Express, io: Server) { endTime: Date.now(), metadata: test.metadata }; - + + pendoTrack({ + event: 'individual_test_failed', + properties: { + testDescription: test.description, + errorMessage: error instanceof Error ? error.message : 'Unknown error', + category: test.metadata?.category || '', + difficulty: test.metadata?.difficulty || '', + }, + }); + // Emit test failure io.emit('testResult', failedResult); return failedResult; @@ -209,17 +246,50 @@ export function setupTestRoutes(app: Express, io: Server) { console.log(`Loaded ${tests.length} tests`); // Run tests in parallel with batch size of 3 + const suiteStartTime = Date.now(); const results = await runTestsInParallel(tests, evaluator, 3, io); - - res.json({ - success: true, + const suiteDuration = Date.now() - suiteStartTime; + + const passedCount = results.filter((r: any) => r.status === 'completed' && r.score >= 0.8).length; + const failedCount = results.length - passedCount; + const avgScore = results.length > 0 + ? results.reduce((sum: number, r: any) => sum + (r.score || 0), 0) / results.length + : 0; + + pendoTrack({ + event: 'test_suite_completed', + properties: { + provider, + model, + totalTests: results.length, + passedCount, + failedCount, + totalDurationMs: suiteDuration, + averageScore: Math.round(avgScore * 100) / 100, + batchSize: 3, + }, + }); + + res.json({ + success: true, message: `Completed ${results.length} tests`, - results + results }); } catch (error) { console.error('Error in test execution:', error); - res.status(500).json({ - success: false, + + pendoTrack({ + event: 'test_suite_failed', + properties: { + provider, + model, + errorMessage: error instanceof Error ? error.message : 'Unknown error', + errorType: error instanceof Error ? error.constructor.name : 'Unknown', + }, + }); + + res.status(500).json({ + success: false, error: error instanceof Error ? error.message : 'Unknown error' }); } diff --git a/src/utils/pendoTrack.ts b/src/utils/pendoTrack.ts new file mode 100644 index 0000000..4c9e79a --- /dev/null +++ b/src/utils/pendoTrack.ts @@ -0,0 +1,31 @@ +const PENDO_DATA_HOST = 'https://data.pendo-dev.pendo-dev.com'; +const PENDO_INTEGRATION_KEY = '73f9153a-be31-4f52-8f34-c746813d333e'; + +interface PendoTrackPayload { + event: string; + visitorId?: string; + accountId?: string; + properties?: Record; +} + +export async function pendoTrack(payload: PendoTrackPayload): Promise { + try { + await fetch(`${PENDO_DATA_HOST}/data/track`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-pendo-integration-key': PENDO_INTEGRATION_KEY, + }, + body: JSON.stringify({ + type: 'track', + event: payload.event, + visitorId: payload.visitorId || 'system', + accountId: payload.accountId || 'system', + timestamp: Date.now(), + properties: payload.properties || {}, + }), + }); + } catch (error) { + console.error(`[Pendo] Failed to track event "${payload.event}":`, error); + } +}