|
| 1 | +//go:build !integration |
| 2 | + |
| 3 | +package cli |
| 4 | + |
| 5 | +import ( |
| 6 | + "testing" |
| 7 | + "time" |
| 8 | + |
| 9 | + "github.com/stretchr/testify/assert" |
| 10 | + "github.com/stretchr/testify/require" |
| 11 | +) |
| 12 | + |
| 13 | +func TestBuildEpisodeDataIncludesToolCalls(t *testing.T) { |
| 14 | + runs := []RunData{ |
| 15 | + { |
| 16 | + DatabaseID: 101, |
| 17 | + WorkflowName: "my-workflow", |
| 18 | + Status: "completed", |
| 19 | + Conclusion: "success", |
| 20 | + TokenUsage: 1000, |
| 21 | + CreatedAt: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC), |
| 22 | + }, |
| 23 | + } |
| 24 | + processedRuns := []ProcessedRun{ |
| 25 | + { |
| 26 | + Run: WorkflowRun{ |
| 27 | + DatabaseID: 101, |
| 28 | + WorkflowName: "my-workflow", |
| 29 | + }, |
| 30 | + MCPToolUsage: &MCPToolUsageData{ |
| 31 | + ToolCalls: []MCPToolCall{ |
| 32 | + { |
| 33 | + ServerName: "github", |
| 34 | + ToolName: "get_file_contents", |
| 35 | + InputSize: 400, |
| 36 | + OutputSize: 9200, |
| 37 | + Duration: "350ms", |
| 38 | + Status: "success", |
| 39 | + }, |
| 40 | + { |
| 41 | + ServerName: "github", |
| 42 | + ToolName: "create_pull_request", |
| 43 | + InputSize: 200, |
| 44 | + OutputSize: 3000, |
| 45 | + Duration: "600ms", |
| 46 | + Status: "error", |
| 47 | + Error: "403 Resource not accessible by integration", |
| 48 | + }, |
| 49 | + }, |
| 50 | + }, |
| 51 | + }, |
| 52 | + } |
| 53 | + |
| 54 | + episodes, _ := buildEpisodeData(runs, processedRuns) |
| 55 | + require.Len(t, episodes, 1, "expected one episode") |
| 56 | + |
| 57 | + ep := episodes[0] |
| 58 | + require.Len(t, ep.ToolCalls, 2, "expected two tool calls in episode") |
| 59 | + |
| 60 | + // Tool calls are sorted by server, then tool name. With server="github": |
| 61 | + // "create_pull_request" < "get_file_contents" alphabetically. |
| 62 | + |
| 63 | + // First (alphabetically): create_pull_request — error call |
| 64 | + tc0 := ep.ToolCalls[0] |
| 65 | + assert.Equal(t, "create_pull_request", tc0.Tool, "tool name should match") |
| 66 | + assert.Equal(t, "github", tc0.Server, "server name should match") |
| 67 | + assert.Equal(t, (200+3000)/CharsPerToken, tc0.Tokens, "tokens should be estimated from sizes") |
| 68 | + assert.Equal(t, int64(600), tc0.DurationMS, "duration_ms should be 600") |
| 69 | + assert.Equal(t, "error", tc0.Status, "status should match") |
| 70 | + assert.Equal(t, "403 Resource not accessible by integration", tc0.Error, "error message should match") |
| 71 | + |
| 72 | + // Second (alphabetically): get_file_contents — success call |
| 73 | + tc1 := ep.ToolCalls[1] |
| 74 | + assert.Equal(t, "get_file_contents", tc1.Tool, "tool name should match") |
| 75 | + assert.Equal(t, "github", tc1.Server, "server name should match") |
| 76 | + assert.Equal(t, (400+9200)/CharsPerToken, tc1.Tokens, "tokens should be estimated from sizes") |
| 77 | + assert.Equal(t, int64(350), tc1.DurationMS, "duration_ms should be 350") |
| 78 | + assert.Equal(t, "success", tc1.Status, "status should match") |
| 79 | + assert.Empty(t, tc1.Error, "no error expected") |
| 80 | +} |
| 81 | + |
| 82 | +func TestBuildEpisodeDataNoToolCallsWhenMCPUsageAbsent(t *testing.T) { |
| 83 | + runs := []RunData{ |
| 84 | + { |
| 85 | + DatabaseID: 200, |
| 86 | + WorkflowName: "no-mcp-workflow", |
| 87 | + Status: "completed", |
| 88 | + CreatedAt: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC), |
| 89 | + }, |
| 90 | + } |
| 91 | + processedRuns := []ProcessedRun{ |
| 92 | + { |
| 93 | + Run: WorkflowRun{ |
| 94 | + DatabaseID: 200, |
| 95 | + WorkflowName: "no-mcp-workflow", |
| 96 | + }, |
| 97 | + MCPToolUsage: nil, // no MCP tool usage |
| 98 | + }, |
| 99 | + } |
| 100 | + |
| 101 | + episodes, _ := buildEpisodeData(runs, processedRuns) |
| 102 | + require.Len(t, episodes, 1, "expected one episode") |
| 103 | + |
| 104 | + ep := episodes[0] |
| 105 | + assert.Empty(t, ep.ToolCalls, "tool_calls should be absent when no MCP usage data") |
| 106 | +} |
| 107 | + |
| 108 | +func TestBuildEpisodeDataAggregatesToolCallsAcrossRuns(t *testing.T) { |
| 109 | + // Two runs belonging to the same episode (via dispatch) |
| 110 | + workflowCallID := "dispatch:wc-42" |
| 111 | + runs := []RunData{ |
| 112 | + { |
| 113 | + DatabaseID: 301, |
| 114 | + WorkflowName: "orchestrator", |
| 115 | + Status: "completed", |
| 116 | + CreatedAt: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC), |
| 117 | + AwContext: &AwContext{ |
| 118 | + WorkflowCallID: "wc-42", |
| 119 | + }, |
| 120 | + }, |
| 121 | + { |
| 122 | + DatabaseID: 302, |
| 123 | + WorkflowName: "worker", |
| 124 | + Status: "completed", |
| 125 | + CreatedAt: time.Date(2024, 1, 1, 12, 1, 0, 0, time.UTC), |
| 126 | + AwContext: &AwContext{ |
| 127 | + WorkflowCallID: "wc-42", |
| 128 | + }, |
| 129 | + }, |
| 130 | + } |
| 131 | + processedRuns := []ProcessedRun{ |
| 132 | + { |
| 133 | + Run: WorkflowRun{DatabaseID: 301, WorkflowName: "orchestrator"}, |
| 134 | + MCPToolUsage: &MCPToolUsageData{ |
| 135 | + ToolCalls: []MCPToolCall{ |
| 136 | + { |
| 137 | + ServerName: "github", |
| 138 | + ToolName: "search_code", |
| 139 | + InputSize: 100, |
| 140 | + OutputSize: 500, |
| 141 | + Duration: "200ms", |
| 142 | + Status: "success", |
| 143 | + }, |
| 144 | + }, |
| 145 | + }, |
| 146 | + }, |
| 147 | + { |
| 148 | + Run: WorkflowRun{DatabaseID: 302, WorkflowName: "worker"}, |
| 149 | + MCPToolUsage: &MCPToolUsageData{ |
| 150 | + ToolCalls: []MCPToolCall{ |
| 151 | + { |
| 152 | + ServerName: "github", |
| 153 | + ToolName: "create_issue", |
| 154 | + InputSize: 50, |
| 155 | + OutputSize: 200, |
| 156 | + Duration: "400ms", |
| 157 | + Status: "success", |
| 158 | + }, |
| 159 | + }, |
| 160 | + }, |
| 161 | + }, |
| 162 | + } |
| 163 | + |
| 164 | + episodes, _ := buildEpisodeData(runs, processedRuns) |
| 165 | + require.Len(t, episodes, 1, "expected one merged episode from two dispatch runs") |
| 166 | + |
| 167 | + ep := episodes[0] |
| 168 | + assert.Equal(t, workflowCallID, ep.EpisodeID, "episode id should reflect dispatch call id") |
| 169 | + assert.Len(t, ep.ToolCalls, 2, "tool_calls should include calls from both runs") |
| 170 | +} |
| 171 | + |
| 172 | +func TestMCPToolCallToEpisodeToolCall(t *testing.T) { |
| 173 | + tests := []struct { |
| 174 | + name string |
| 175 | + input MCPToolCall |
| 176 | + expectedTool string |
| 177 | + expectedServer string |
| 178 | + expectedTokens int |
| 179 | + expectedDurMS int64 |
| 180 | + expectedStatus string |
| 181 | + expectedError string |
| 182 | + }{ |
| 183 | + { |
| 184 | + name: "success call with duration", |
| 185 | + input: MCPToolCall{ |
| 186 | + ServerName: "github", |
| 187 | + ToolName: "list_issues", |
| 188 | + InputSize: 400, |
| 189 | + OutputSize: 1200, |
| 190 | + Duration: "250ms", |
| 191 | + Status: "success", |
| 192 | + }, |
| 193 | + expectedTool: "list_issues", |
| 194 | + expectedServer: "github", |
| 195 | + expectedTokens: (400 + 1200) / CharsPerToken, |
| 196 | + expectedDurMS: 250, |
| 197 | + expectedStatus: "success", |
| 198 | + }, |
| 199 | + { |
| 200 | + name: "error call with error message", |
| 201 | + input: MCPToolCall{ |
| 202 | + ServerName: "playwright", |
| 203 | + ToolName: "navigate", |
| 204 | + InputSize: 100, |
| 205 | + OutputSize: 0, |
| 206 | + Duration: "1s", |
| 207 | + Status: "error", |
| 208 | + Error: "timeout", |
| 209 | + }, |
| 210 | + expectedTool: "navigate", |
| 211 | + expectedServer: "playwright", |
| 212 | + expectedTokens: 100 / CharsPerToken, |
| 213 | + expectedDurMS: 1000, |
| 214 | + expectedStatus: "error", |
| 215 | + expectedError: "timeout", |
| 216 | + }, |
| 217 | + { |
| 218 | + name: "call without duration", |
| 219 | + input: MCPToolCall{ |
| 220 | + ServerName: "github", |
| 221 | + ToolName: "get_repo", |
| 222 | + InputSize: 200, |
| 223 | + OutputSize: 800, |
| 224 | + Duration: "", |
| 225 | + Status: "success", |
| 226 | + }, |
| 227 | + expectedTool: "get_repo", |
| 228 | + expectedServer: "github", |
| 229 | + expectedTokens: (200 + 800) / CharsPerToken, |
| 230 | + expectedDurMS: 0, |
| 231 | + expectedStatus: "success", |
| 232 | + }, |
| 233 | + } |
| 234 | + |
| 235 | + for _, tt := range tests { |
| 236 | + t.Run(tt.name, func(t *testing.T) { |
| 237 | + got := mcpToolCallToEpisodeToolCall(tt.input) |
| 238 | + assert.Equal(t, tt.expectedTool, got.Tool, "Tool should match") |
| 239 | + assert.Equal(t, tt.expectedServer, got.Server, "Server should match") |
| 240 | + assert.Equal(t, tt.expectedTokens, got.Tokens, "Tokens should be estimated from sizes") |
| 241 | + assert.Equal(t, tt.expectedDurMS, got.DurationMS, "DurationMS should match") |
| 242 | + assert.Equal(t, tt.expectedStatus, got.Status, "Status should match") |
| 243 | + assert.Equal(t, tt.expectedError, got.Error, "Error should match") |
| 244 | + }) |
| 245 | + } |
| 246 | +} |
0 commit comments