diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 40133fbcf..555f0337c 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -108,6 +108,11 @@ type Store interface { // session transcript (prompt to stop event). // Returns ErrCheckpointNotFound if the checkpoint doesn't exist. UpdateCommitted(ctx context.Context, opts UpdateCommittedOptions) error + + // UpdateCheckpointSummary patches the root CheckpointSummary for an existing checkpoint. + // Used after all sessions are condensed to store combined multi-session attribution. + // Returns ErrCheckpointNotFound if the checkpoint does not exist. + UpdateCheckpointSummary(ctx context.Context, opts UpdateCheckpointSummaryOptions) error } // WriteTemporaryResult contains the result of writing a temporary checkpoint. @@ -312,6 +317,15 @@ type UpdateCommittedOptions struct { Agent types.AgentType } +// UpdateCheckpointSummaryOptions holds options for patching the root CheckpointSummary. +type UpdateCheckpointSummaryOptions struct { + // CheckpointID identifies the checkpoint whose root metadata.json to update. + CheckpointID id.CheckpointID + + // CombinedAttribution is the merged attribution computed across all sessions. + CombinedAttribution *InitialAttribution +} + // CommittedInfo contains summary information about a committed checkpoint. type CommittedInfo struct { // CheckpointID is the stable 12-hex-char identifier @@ -451,6 +465,10 @@ type CheckpointSummary struct { FilesTouched []string `json:"files_touched"` Sessions []SessionFilePaths `json:"sessions"` TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"` + // CombinedAttribution is computed once across all sessions sharing a shadow branch. + // Only present for multi-session checkpoints written by CLI v0.x+. + // When present, prefer this over per-session InitialAttribution in aggregation. + CombinedAttribution *InitialAttribution `json:"combined_attribution,omitempty"` } // SessionMetrics contains hook-provided session metrics from agents that report diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 47a75beb8..f1ba25462 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -1102,6 +1102,82 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint return nil } +// UpdateCheckpointSummary patches the root-level metadata.json (CheckpointSummary) +// for an existing checkpoint, setting CombinedAttribution. +// Returns ErrCheckpointNotFound if the checkpoint does not exist. +func (s *GitStore) UpdateCheckpointSummary(ctx context.Context, opts UpdateCheckpointSummaryOptions) error { + if opts.CombinedAttribution == nil { + return nil // No-op: nothing to patch + } + + if err := ctx.Err(); err != nil { + return err //nolint:wrapcheck // Propagating context cancellation + } + + if err := s.ensureSessionsBranch(); err != nil { + return fmt.Errorf("failed to ensure sessions branch: %w", err) + } + + parentHash, rootTreeHash, err := s.getSessionsBranchRef() + if err != nil { + return err + } + + basePath := opts.CheckpointID.Path() + "/" + checkpointPath := opts.CheckpointID.Path() + entries, err := s.flattenCheckpointEntries(rootTreeHash, checkpointPath) + if err != nil { + return err + } + + rootMetadataPath := basePath + paths.MetadataFileName + entry, exists := entries[rootMetadataPath] + if !exists { + return ErrCheckpointNotFound + } + + summary, err := s.readSummaryFromBlob(entry.Hash) + if err != nil { + return fmt.Errorf("failed to read checkpoint summary: %w", err) + } + + summary.CombinedAttribution = opts.CombinedAttribution + + summaryJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal summary: %w", err) + } + summaryHash, err := CreateBlobFromContent(s.repo, summaryJSON) + if err != nil { + return fmt.Errorf("failed to create summary blob: %w", err) + } + entries[rootMetadataPath] = object.TreeEntry{ + Name: rootMetadataPath, + Mode: filemode.Regular, + Hash: summaryHash, + } + + newTreeHash, err := s.spliceCheckpointSubtree(rootTreeHash, opts.CheckpointID, basePath, entries) + if err != nil { + return err + } + + authorName, authorEmail := GetGitAuthorFromRepo(s.repo) + commitMsg := fmt.Sprintf("Checkpoint: %s\n\nEntire-Combined-Attribution: true", opts.CheckpointID) + newCommitHash, err := s.createCommit(newTreeHash, parentHash, commitMsg, authorName, authorEmail) + if err != nil { + return err + } + + refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName) + newRef := plumbing.NewHashReference(refName, newCommitHash) + if err := s.repo.Storer.SetReference(newRef); err != nil { + return fmt.Errorf("failed to set branch reference: %w", err) + } + + return nil +} + // UpdateCommitted replaces the transcript, prompts, and context for an existing // committed checkpoint. Uses replace semantics: the full session transcript is // written, replacing whatever was stored at initial condensation time. diff --git a/cmd/entire/cli/checkpoint/committed_update_test.go b/cmd/entire/cli/checkpoint/committed_update_test.go index cb193a428..17d05b208 100644 --- a/cmd/entire/cli/checkpoint/committed_update_test.go +++ b/cmd/entire/cli/checkpoint/committed_update_test.go @@ -580,5 +580,122 @@ func TestGetGitAuthorFromRepo_NoConfig(t *testing.T) { } } +func TestUpdateCheckpointSummary_SetsCombinedAttribution(t *testing.T) { + t.Parallel() + _, store, cpID := setupRepoForUpdate(t) + + combined := &InitialAttribution{ + AgentLines: 15, + HumanAdded: 3, + HumanModified: 1, + HumanRemoved: 0, + TotalCommitted: 18, + AgentPercentage: 83.3, + } + + err := store.UpdateCheckpointSummary(context.Background(), UpdateCheckpointSummaryOptions{ + CheckpointID: cpID, + CombinedAttribution: combined, + }) + if err != nil { + t.Fatalf("UpdateCheckpointSummary() error = %v", err) + } + + summary, err := store.ReadCommitted(context.Background(), cpID) + if err != nil { + t.Fatalf("ReadCommitted() error = %v", err) + } + + if summary.CombinedAttribution == nil { + t.Fatal("CombinedAttribution is nil after update") + } + if summary.CombinedAttribution.AgentLines != 15 { + t.Errorf("AgentLines = %d, want 15", summary.CombinedAttribution.AgentLines) + } + if summary.CombinedAttribution.HumanAdded != 3 { + t.Errorf("HumanAdded = %d, want 3", summary.CombinedAttribution.HumanAdded) + } + if summary.CombinedAttribution.AgentPercentage != 83.3 { + t.Errorf("AgentPercentage = %.1f, want 83.3", summary.CombinedAttribution.AgentPercentage) + } +} + +func TestUpdateCheckpointSummary_NonexistentCheckpoint(t *testing.T) { + t.Parallel() + _, store, _ := setupRepoForUpdate(t) + + err := store.UpdateCheckpointSummary(context.Background(), UpdateCheckpointSummaryOptions{ + CheckpointID: id.MustCheckpointID("deadbeef1234"), + CombinedAttribution: &InitialAttribution{ + AgentLines: 10, + }, + }) + if err == nil { + t.Fatal("expected error for nonexistent checkpoint, got nil") + } +} + +func TestUpdateCheckpointSummary_PreservesExistingFields(t *testing.T) { + t.Parallel() + _, store, cpID := setupRepoForUpdate(t) + + summaryBefore, err := store.ReadCommitted(context.Background(), cpID) + if err != nil { + t.Fatalf("ReadCommitted() before error = %v", err) + } + + err = store.UpdateCheckpointSummary(context.Background(), UpdateCheckpointSummaryOptions{ + CheckpointID: cpID, + CombinedAttribution: &InitialAttribution{ + AgentLines: 10, + TotalCommitted: 10, + AgentPercentage: 100.0, + }, + }) + if err != nil { + t.Fatalf("UpdateCheckpointSummary() error = %v", err) + } + + summaryAfter, err := store.ReadCommitted(context.Background(), cpID) + if err != nil { + t.Fatalf("ReadCommitted() after error = %v", err) + } + + if summaryAfter.CheckpointID != summaryBefore.CheckpointID { + t.Errorf("CheckpointID changed: %q -> %q", summaryBefore.CheckpointID, summaryAfter.CheckpointID) + } + if summaryAfter.Strategy != summaryBefore.Strategy { + t.Errorf("Strategy changed: %q -> %q", summaryBefore.Strategy, summaryAfter.Strategy) + } + if len(summaryAfter.Sessions) != len(summaryBefore.Sessions) { + t.Errorf("Sessions count changed: %d -> %d", len(summaryBefore.Sessions), len(summaryAfter.Sessions)) + } + if summaryAfter.CombinedAttribution == nil { + t.Fatal("CombinedAttribution should be set after update") + } +} + +func TestUpdateCheckpointSummary_NilCombinedAttributionIsNoop(t *testing.T) { + t.Parallel() + _, store, cpID := setupRepoForUpdate(t) + + // Nil CombinedAttribution should be a no-op (no error, no unnecessary commit) + err := store.UpdateCheckpointSummary(context.Background(), UpdateCheckpointSummaryOptions{ + CheckpointID: cpID, + CombinedAttribution: nil, + }) + if err != nil { + t.Fatalf("UpdateCheckpointSummary(nil) error = %v", err) + } + + summary, err := store.ReadCommitted(context.Background(), cpID) + if err != nil { + t.Fatalf("ReadCommitted() error = %v", err) + } + if summary.CombinedAttribution != nil { + t.Errorf("CombinedAttribution should remain nil, got %+v", summary.CombinedAttribution) + } +} + // Verify go-git config import is used (compile-time check). var _ = config.GlobalScope diff --git a/cmd/entire/cli/integration_test/attribution_test.go b/cmd/entire/cli/integration_test/attribution_test.go index cc470418b..6d9206098 100644 --- a/cmd/entire/cli/integration_test/attribution_test.go +++ b/cmd/entire/cli/integration_test/attribution_test.go @@ -4,6 +4,7 @@ package integration import ( "encoding/json" + "strings" "testing" "github.com/entireio/cli/cmd/entire/cli/checkpoint" @@ -516,6 +517,77 @@ func TestManualCommit_AttributionNoDoubleCount(t *testing.T) { } } +func TestManualCommit_Attribution_IntermediateCommit(t *testing.T) { + t.Parallel() + env := NewTestEnv(t) + defer env.Cleanup() + + env.InitRepo() + + env.WriteFile("existing.go", "package main\n") + env.GitAdd("existing.go") + env.GitCommit("Initial commit") + + env.InitEntire() + + session := env.NewSession() + if err := env.SimulateUserPromptSubmit(session.ID); err != nil { + t.Fatalf("SimulateUserPromptSubmit failed: %v", err) + } + + agentContent := "package main\n\n" + strings.Repeat("// agent line\n", 18) + env.WriteFile("agent.go", agentContent) + + session.CreateTranscript( + "Create agent.go with agent-authored lines", + []FileChange{{Path: "agent.go", Content: agentContent}}, + ) + if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil { + t.Fatalf("SimulateStop failed: %v", err) + } + + unrelatedContent := "package main\n\n" + strings.Repeat("// unrelated line\n", 48) + env.WriteFile("unrelated.go", unrelatedContent) + env.GitAdd("unrelated.go") + env.GitCommit("Add unrelated.go without hooks") + + env.GitCommitWithShadowHooks("Add agent work", "agent.go") + + repo, err := git.PlainOpen(env.RepoDir) + if err != nil { + t.Fatalf("failed to open repo: %v", err) + } + + headHash := env.GetHeadHash() + commitObj, err := repo.CommitObject(plumbing.NewHash(headHash)) + if err != nil { + t.Fatalf("failed to get commit object: %v", err) + } + + checkpointID, found := trailers.ParseCheckpoint(commitObj.Message) + if !found { + t.Fatal("Commit should have Entire-Checkpoint trailer") + } + + attr := getAttributionFromMetadata(t, repo, checkpointID) + + // agent.go: 2-line header + 18 agent lines = 20 lines. + // unrelated.go must not appear in this commit's attribution — the fix scopes + // non-agent file enumeration to parent→HEAD only. + if attr.AgentLines != 20 { + t.Errorf("AgentLines = %d, want 20", attr.AgentLines) + } + if attr.HumanAdded != 0 { + t.Errorf("HumanAdded = %d, want 0 (unrelated.go committed separately)", attr.HumanAdded) + } + if attr.TotalCommitted != 20 { + t.Errorf("TotalCommitted = %d, want 20 (only agent.go in this commit)", attr.TotalCommitted) + } + if attr.AgentPercentage < 99.9 || attr.AgentPercentage > 100.1 { + t.Errorf("AgentPercentage = %.1f%%, want 100.0%%", attr.AgentPercentage) + } +} + // getAttributionFromMetadata reads attribution from a checkpoint on entire/checkpoints/v1 branch. // InitialAttribution is stored in session-level metadata (0/metadata.json). func getAttributionFromMetadata(t *testing.T, repo *git.Repository, checkpointID id.CheckpointID) *checkpoint.InitialAttribution { diff --git a/cmd/entire/cli/strategy/manual_commit_attribution.go b/cmd/entire/cli/strategy/manual_commit_attribution.go index 04c535bb1..6458ad900 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution.go @@ -179,8 +179,9 @@ func countLinesStr(content string) int { // 4. Estimate user self-modifications vs agent modifications using per-file tracking // 5. Compute percentages // -// attributionBaseCommit and headCommitHash are optional commit hashes for fast non-agent -// file detection via git diff-tree. When empty, falls back to go-git tree walk. +// attributionBaseCommit, headCommitHash, and parentCommitHash are optional commit hashes +// for fast non-agent file detection via git diff-tree. When empty, falls back to go-git +// tree walk using the provided trees. // // Note: Binary files (detected by null bytes) are silently excluded from attribution // calculations since line-based diffing only applies to text files. @@ -191,11 +192,13 @@ func CalculateAttributionWithAccumulated( baseTree *object.Tree, shadowTree *object.Tree, headTree *object.Tree, + parentTree *object.Tree, filesTouched []string, promptAttributions []PromptAttribution, repoDir string, attributionBaseCommit string, headCommitHash string, + parentCommitHash string, ) *checkpoint.InitialAttribution { if len(filesTouched) == 0 { return nil @@ -242,9 +245,29 @@ func CalculateAttributionWithAccumulated( } } - // Calculate total user edits to non-agent files (files not in filesTouched) - // These files are not in the shadow tree, so base→head captures ALL their user edits - allChangedFiles, err := getAllChangedFiles(ctx, baseTree, headTree, repoDir, attributionBaseCommit, headCommitHash) + // Calculate total user edits to non-agent files (files not in filesTouched). + // Scope this to the attributed commit only by diffing parent→head. If parent + // context is unavailable, fall back to the session base behavior. + // IMPORTANT: nonAgentBaseTree and nonAgentBaseCommit must reference the same + // commit — getAllChangedFiles has a fast path (CLI diff-tree using hashes) and + // a slow path (go-git tree walk using trees). If they diverge, attribution + // produces inconsistent results depending on which path fires. + nonAgentBaseTree := baseTree + nonAgentBaseCommit := attributionBaseCommit + if parentTree != nil { + nonAgentBaseTree = parentTree + nonAgentBaseCommit = parentCommitHash + } else if parentCommitHash != "" { + // parentCommitHash is set but parentTree is nil — parent object resolution + // failed (e.g. shallow clone, pack corruption). Fall back to session base + // for both tree and hash to keep them in sync. + logging.Warn(logging.WithComponent(ctx, "attribution"), + "attribution: parent tree unavailable despite parent hash being set; non-agent file counts may be inflated", + slog.String("parent_commit_hash", parentCommitHash), + ) + } + + allChangedFiles, err := getAllChangedFiles(ctx, nonAgentBaseTree, headTree, repoDir, nonAgentBaseCommit, headCommitHash) if err != nil { logging.Warn(logging.WithComponent(ctx, "attribution"), "attribution: failed to enumerate changed files", @@ -258,7 +281,7 @@ func CalculateAttributionWithAccumulated( continue // Skip agent-touched files } - baseContent := getFileContent(baseTree, filePath) + baseContent := getFileContent(nonAgentBaseTree, filePath) headContent := getFileContent(headTree, filePath) _, userAdded, _ := diffLines(baseContent, headContent) allUserEditsToNonAgentFiles += userAdded diff --git a/cmd/entire/cli/strategy/manual_commit_attribution_test.go b/cmd/entire/cli/strategy/manual_commit_attribution_test.go index 478c6a951..64ed3fa1b 100644 --- a/cmd/entire/cli/strategy/manual_commit_attribution_test.go +++ b/cmd/entire/cli/strategy/manual_commit_attribution_test.go @@ -3,6 +3,7 @@ package strategy import ( "context" "sort" + "strings" "testing" "github.com/go-git/go-git/v6/plumbing" @@ -256,6 +257,102 @@ func buildTestTree(t *testing.T, files map[string]string) *object.Tree { return treeObj } +func TestCalculateAttributionWithAccumulated_UsesParentBaselineForNonAgentFiles(t *testing.T) { + t.Parallel() + baseTree := buildTestTree(t, map[string]string{ + "agent-file.go": strings.Repeat("agent line\n", 10), + "unrelated.go": strings.Repeat("old unrelated\n", 5), + }) + + shadowTree := buildTestTree(t, map[string]string{ + "agent-file.go": strings.Repeat("agent line\n", 20), + "unrelated.go": strings.Repeat("old unrelated\n", 5), + }) + + headTree := buildTestTree(t, map[string]string{ + "agent-file.go": strings.Repeat("agent line\n", 20), + "unrelated.go": strings.Repeat("new unrelated\n", 20), + }) + + parentTree := buildTestTree(t, map[string]string{ + "agent-file.go": strings.Repeat("agent line\n", 10), + "unrelated.go": strings.Repeat("new unrelated\n", 20), + }) + + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, + shadowTree, + headTree, + parentTree, + []string{"agent-file.go"}, + nil, + "", + "", + "", + "", + ) + + require.NotNil(t, result, "expected non-nil result") + + if result.AgentLines != 10 { + t.Errorf("AgentLines = %d, want 10", result.AgentLines) + } + if result.HumanAdded != 0 { + t.Errorf("HumanAdded = %d, want 0", result.HumanAdded) + } + if result.TotalCommitted != 10 { + t.Errorf("TotalCommitted = %d, want 10", result.TotalCommitted) + } + if result.AgentPercentage < 99.9 || result.AgentPercentage > 100.1 { + t.Errorf("AgentPercentage = %.1f%%, want 100.0%%", result.AgentPercentage) + } +} + +func TestCalculateAttributionWithAccumulated_NilParentTreeFallsBackToBase(t *testing.T) { + t.Parallel() + baseTree := buildTestTree(t, map[string]string{ + "agent-file.go": strings.Repeat("agent line\n", 5), + }) + + shadowTree := buildTestTree(t, map[string]string{ + "agent-file.go": strings.Repeat("agent line\n", 10), + }) + + headTree := buildTestTree(t, map[string]string{ + "agent-file.go": strings.Repeat("agent line\n", 10), + }) + + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, + shadowTree, + headTree, + nil, + []string{"agent-file.go"}, + nil, + "", + "", + "", + "", + ) + + require.NotNil(t, result, "expected non-nil result") + + if result.AgentLines != 5 { + t.Errorf("AgentLines = %d, want 5", result.AgentLines) + } + if result.HumanAdded != 0 { + t.Errorf("HumanAdded = %d, want 0", result.HumanAdded) + } + if result.TotalCommitted != 5 { + t.Errorf("TotalCommitted = %d, want 5", result.TotalCommitted) + } + if result.AgentPercentage != 100.0 { + t.Errorf("AgentPercentage = %.1f%%, want 100.0%%", result.AgentPercentage) + } +} + // TestCalculateAttributionWithAccumulated_BasicCase tests the basic scenario // where the agent adds lines and the user makes some edits. // @@ -281,7 +378,7 @@ func TestCalculateAttributionWithAccumulated_BasicCase(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -338,7 +435,7 @@ func TestCalculateAttributionWithAccumulated_BugScenario(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -395,7 +492,7 @@ func TestCalculateAttributionWithAccumulated_DeletionOnly(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -444,7 +541,7 @@ func TestCalculateAttributionWithAccumulated_NoUserEdits(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -496,7 +593,7 @@ func TestCalculateAttributionWithAccumulated_NoAgentWork(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -550,7 +647,7 @@ func TestCalculateAttributionWithAccumulated_UserRemovesAllAgentLines(t *testing result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -619,7 +716,7 @@ func TestCalculateAttributionWithAccumulated_WithPromptAttributions(t *testing.T result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -662,7 +759,7 @@ func TestCalculateAttributionWithAccumulated_EmptyFilesTouched(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, []string{}, []PromptAttribution{}, "", "", "", + baseTree, shadowTree, headTree, nil, []string{}, []PromptAttribution{}, "", "", "", "", ) if result != nil { @@ -716,7 +813,7 @@ func TestCalculateAttributionWithAccumulated_UserEditsNonAgentFile(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -1021,7 +1118,7 @@ func TestCalculateAttributionWithAccumulated_UserSelfModification(t *testing.T) result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -1092,7 +1189,7 @@ func TestCalculateAttributionWithAccumulated_MixedModifications(t *testing.T) { result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -1173,7 +1270,7 @@ func TestCalculateAttributionWithAccumulated_UncommittedWorktreeFiles(t *testing result := CalculateAttributionWithAccumulated( context.Background(), - baseTree, shadowTree, headTree, filesTouched, promptAttributions, "", "", "", + baseTree, shadowTree, headTree, nil, filesTouched, promptAttributions, "", "", "", "", ) require.NotNil(t, result, "expected non-nil result") @@ -1237,3 +1334,134 @@ func TestCalculatePromptAttribution_PopulatesPerFile(t *testing.T) { t.Errorf("UserAddedPerFile[b.go] = %d, want 1", result.UserAddedPerFile["b.go"]) } } + +func TestComputeCombinedAttribution_EmptyInput_ReturnsNil(t *testing.T) { + t.Parallel() + result := computeCombinedAttribution(context.Background(), nil, nil) + if result != nil { + t.Errorf("expected nil for empty input, got %+v", result) + } +} + +func TestComputeCombinedAttribution_SingleSession_ReturnsNil(t *testing.T) { + t.Parallel() + sessions := []sessionAttrData{ + {promptAttributions: []PromptAttribution{{UserLinesAdded: 5}}}, + } + result := computeCombinedAttribution(context.Background(), nil, sessions) + if result != nil { + t.Errorf("expected nil for single session (no-op), got %+v", result) + } +} + +// TestBugC_DemonstrateFix shows the before/after for multi-session attribution. +// +// Scenario: +// - Base: shared.go with 5 lines (user-authored baseline) +// - User edits shared.go adding 3 lines → session 1 captures this as PromptAttribution +// - Session 1 agent: modifies shared.go to 15 lines (+7 agent lines on top of the 3 user lines) +// - Session 2 agent: adds new_feature.go with 8 lines (no user edits between sessions) +// - Both sessions share the same shadow branch (shadow has both files) +// - User commits once (head = shadow) +// +// BEFORE fix (latest session wins = session 2's empty PromptAttributions): +// +// The 3 user lines on shared.go are not subtracted → agent gets credit for them +// Result: agentLines=18, humanAdded=0 → 78% (OVERCOUNTS agent work) +// +// AFTER fix (combined PromptAttributions from both sessions): +// +// session 1's PromptAttribution (shared.go=3) is merged in → subtracted correctly +// Result: agentLines=15, humanAdded=3 → 83.3% (correct — user credit preserved) +func TestBugC_DemonstrateFix(t *testing.T) { + t.Parallel() + + // Base: shared.go with 5 lines (before any agent work) + baseTree := buildTestTree(t, map[string]string{ + "shared.go": "line1\nline2\nline3\nline4\nline5\n", + }) + + // Shadow: both sessions' agent work on the shared branch + // shared.go: base(5) + user(3) + agent-session1(7) = 15 lines + // new_feature.go: agent-session2 added 8 lines + shadowTree := buildTestTree(t, map[string]string{ + "shared.go": "line1\nline2\nline3\nline4\nline5\nuser1\nuser2\nuser3\nagent1\nagent2\nagent3\nagent4\nagent5\nagent6\nagent7\n", + "new_feature.go": "feat1\nfeat2\nfeat3\nfeat4\nfeat5\nfeat6\nfeat7\nfeat8\n", + }) + + // Head = shadow (agent committed directly, no user edits after last checkpoint) + headTree := shadowTree + + filesTouched := []string{"shared.go", "new_feature.go"} + + // Session 1 PromptAttributions: captured user adding 3 lines to shared.go + // before session 1 started (user wrote user1, user2, user3) + session1PAs := []PromptAttribution{ + { + CheckpointNumber: 1, + UserLinesAdded: 3, + UserLinesRemoved: 0, + UserAddedPerFile: map[string]int{"shared.go": 3}, + }, + } + + // Session 2 PromptAttributions: no user edits between sessions + session2PAs := []PromptAttribution{} + + ctx := context.Background() + + // ── BEFORE fix: latest session wins (session 2's empty PromptAttributions) ── + before := CalculateAttributionWithAccumulated( + ctx, + baseTree, shadowTree, headTree, nil, + filesTouched, session2PAs, // session 2 only — latest wins + "", "", "", "", + ) + + // ── AFTER fix: combined PromptAttributions (session 1 + session 2) ── + combinedPAs := make([]PromptAttribution, 0, len(session1PAs)+len(session2PAs)) + combinedPAs = append(combinedPAs, session1PAs...) + combinedPAs = append(combinedPAs, session2PAs...) + after := CalculateAttributionWithAccumulated( + ctx, + baseTree, shadowTree, headTree, nil, + filesTouched, combinedPAs, // both sessions merged + "", "", "", "", + ) + + t.Logf("── Scenario ─────────────────────────────────────────────────") + t.Logf(" base: shared.go = 5 lines (original file)") + t.Logf(" user wrote 3 lines to shared.go before session 1") + t.Logf(" session1 agent: added 7 lines to shared.go (shared.go now 15 lines total)") + t.Logf(" session2 agent: created new_feature.go with 8 lines") + t.Logf(" user commits once → head has shared.go(15) + new_feature.go(8) = 23 lines total") + t.Logf("") + t.Logf(" session1 PromptAttributions: user_added=3 on shared.go") + t.Logf(" session2 PromptAttributions: (empty — no user edits between sessions)") + t.Logf("") + t.Logf("── BEFORE (latest session wins = only session2 PromptAttributions used) ──") + t.Logf(" session2 has no record of the 3 user lines → they get credited to agent") + t.Logf(" agentLines=%d humanAdded=%d totalCommitted=%d agentPct=%.1f%%", + before.AgentLines, before.HumanAdded, before.TotalCommitted, before.AgentPercentage) + t.Logf("") + t.Logf("── AFTER (combined = session1 + session2 PromptAttributions merged) ─────") + t.Logf(" session1's 3 user lines on shared.go are merged in and subtracted correctly") + t.Logf(" agentLines=%d humanAdded=%d totalCommitted=%d agentPct=%.1f%%", + after.AgentLines, after.HumanAdded, after.TotalCommitted, after.AgentPercentage) + + // BEFORE: agent gets credit for the 3 user lines on shared.go (overcounts) + if before.AgentLines != 18 { + t.Errorf("BEFORE: AgentLines = %d, want 18 (10 shared + 8 new_feature, user lines not subtracted)", before.AgentLines) + } + if before.HumanAdded != 0 { + t.Errorf("BEFORE: HumanAdded = %d, want 0 (user contribution lost with latest-wins)", before.HumanAdded) + } + + // AFTER: user's 3 lines correctly attributed to human + if after.AgentLines != 15 { + t.Errorf("AFTER: AgentLines = %d, want 15 (7 shared + 8 new_feature)", after.AgentLines) + } + if after.HumanAdded != 3 { + t.Errorf("AFTER: HumanAdded = %d, want 3 (user wrote 3 lines before session 1)", after.HumanAdded) + } +} diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index 608482151..275a8d324 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -85,12 +85,115 @@ func (s *ManualCommitStrategy) getCheckpointLog(ctx context.Context, checkpointI return content.Transcript, nil } +// sessionAttrData captures per-session data needed to compute combined attribution +// after the condensation loop has cleared state.PromptAttributions. +type sessionAttrData struct { + condenseOpts // Embedded: shadowRef, headTree, parentTree, repoDir, headCommitHash, parentCommitHash + + promptAttributions []PromptAttribution + filesTouched []string + attrBase string +} + +// computeCombinedAttribution merges PromptAttributions from all sessions sharing +// a shadow branch and computes attribution once with the merged data. +// Returns nil for fewer than 2 sessions — single-session checkpoints don't need combining. +func computeCombinedAttribution( + ctx context.Context, + repo *git.Repository, + sessions []sessionAttrData, +) *cpkg.InitialAttribution { + if len(sessions) < 2 { + return nil + } + + var merged []PromptAttribution + seenFiles := make(map[string]struct{}) + var allFilesTouched []string + for _, s := range sessions { + merged = append(merged, s.promptAttributions...) + for _, f := range s.filesTouched { + if _, ok := seenFiles[f]; !ok { + seenFiles[f] = struct{}{} + allFilesTouched = append(allFilesTouched, f) + } + } + } + + first := sessions[0] + attrBase := first.attrBase + for i := 1; i < len(sessions); i++ { + if sessions[i].attrBase != attrBase { + logging.Warn(logging.WithComponent(ctx, "attribution"), + "combined attribution: sessions have divergent attribution base commits; using first session's base", + slog.String("first_base", attrBase), + slog.String("divergent_base", sessions[i].attrBase), + slog.Int("session_index", i), + ) + break + } + } + syntheticState := &SessionState{ + PromptAttributions: merged, + AttributionBaseCommit: attrBase, + BaseCommit: attrBase, + } + syntheticData := &ExtractedSessionData{ + FilesTouched: allFilesTouched, + } + + return calculateSessionAttributions(ctx, repo, first.shadowRef, syntheticData, syntheticState, attributionOpts{ + headTree: first.headTree, + parentTree: first.parentTree, + repoDir: first.repoDir, + attributionBaseCommit: first.attrBase, + headCommitHash: first.headCommitHash, + parentCommitHash: first.parentCommitHash, + }) +} + +// resolveCommitContext extracts HEAD tree, parent tree, and parent commit hash +// from a commit object. Best-effort: warns on failure and leaves fields nil/empty. +// Both parentTree and parentCommitHash are set together or not at all, so callers +// can rely on them being in sync. +func resolveCommitContext(ctx context.Context, commit *object.Commit) (headTree, parentTree *object.Tree, parentCommitHash string) { + logCtx := logging.WithComponent(ctx, "checkpoint") + + if t, err := commit.Tree(); err != nil { + logging.Warn(logCtx, "failed to resolve HEAD tree; attribution will be skipped", + slog.String("commit", commit.Hash.String()), + slog.String("error", err.Error())) + } else { + headTree = t + } + + if commit.NumParents() > 0 { + rawHash := commit.ParentHashes[0] + if parent, err := commit.Parent(0); err != nil { + logging.Warn(logCtx, "failed to load parent commit; parent-scoped attribution unavailable", + slog.String("parent_hash", rawHash.String()), + slog.String("error", err.Error())) + } else if t, err := parent.Tree(); err != nil { + logging.Warn(logCtx, "failed to load parent tree; parent-scoped attribution unavailable", + slog.String("parent_hash", rawHash.String()), + slog.String("error", err.Error())) + } else { + parentTree = t + parentCommitHash = rawHash.String() + } + } + + return headTree, parentTree, parentCommitHash +} + // condenseOpts provides pre-resolved git objects to avoid redundant reads. type condenseOpts struct { - shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo) - headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions) - repoDir string // Repository worktree path for git CLI commands - headCommitHash string // HEAD commit hash (passed through for attribution) + shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo) + headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions) + repoDir string // Repository worktree path for git CLI commands + headCommitHash string // HEAD commit hash (passed through for attribution) + parentTree *object.Tree // HEAD's first parent tree (nil when parentCommitHash is empty or parent resolution failed) + parentCommitHash string // HEAD's first parent hash (empty when parentTree is nil — initial commit or resolution failure) } // CondenseSession condenses a session's shadow branch to permanent storage. @@ -198,6 +301,8 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re repoDir: o.repoDir, attributionBaseCommit: attrBase, headCommitHash: o.headCommitHash, + parentTree: o.parentTree, + parentCommitHash: o.parentCommitHash, }) // Get current branch name @@ -335,6 +440,8 @@ type attributionOpts struct { repoDir string // Repository worktree path for git CLI commands attributionBaseCommit string // Base commit hash for non-agent file detection (empty = fall back to go-git tree walk) headCommitHash string // HEAD commit hash for non-agent file detection (empty = fall back to go-git tree walk) + parentTree *object.Tree // HEAD's first parent tree for non-agent file detection + parentCommitHash string // HEAD's first parent hash for non-agent file detection } func calculateSessionAttributions(ctx context.Context, repo *git.Repository, shadowRef *plumbing.Reference, sessionData *ExtractedSessionData, state *SessionState, opts ...attributionOpts) *cpkg.InitialAttribution { @@ -441,11 +548,13 @@ func calculateSessionAttributions(ctx context.Context, repo *git.Repository, sha baseTree, shadowTree, headTree, + o.parentTree, sessionData.FilesTouched, state.PromptAttributions, o.repoDir, o.attributionBaseCommit, o.headCommitHash, + o.parentCommitHash, ) if attribution != nil { @@ -839,8 +948,28 @@ func (s *ManualCommitStrategy) CondenseSessionByID(ctx context.Context, sessionI return nil } + // Resolve HEAD commit context for attribution scoping. Best-effort: falls back + // gracefully if HEAD resolution fails (e.g. bare repo, detached HEAD edge cases). + var headCommitOpts condenseOpts + if repoDir, rdErr := paths.WorktreeRoot(ctx); rdErr == nil { + headCommitOpts.repoDir = repoDir + } + if headRef, hrErr := repo.Head(); hrErr != nil { + logging.Warn(logCtx, "condense-by-id: failed to resolve HEAD; attribution context unavailable", + slog.String("error", hrErr.Error())) + } else { + headCommitOpts.headCommitHash = headRef.Hash().String() + if headCommit, hcErr := repo.CommitObject(headRef.Hash()); hcErr != nil { + logging.Warn(logCtx, "condense-by-id: failed to load HEAD commit object", + slog.String("commit", headRef.Hash().String()), + slog.String("error", hcErr.Error())) + } else { + headCommitOpts.headTree, headCommitOpts.parentTree, headCommitOpts.parentCommitHash = resolveCommitContext(ctx, headCommit) + } + } + // Condense the session - result, err := s.CondenseSession(ctx, repo, checkpointID, state, nil) + result, err := s.CondenseSession(ctx, repo, checkpointID, state, nil, headCommitOpts) if err != nil { return fmt.Errorf("failed to condense session: %w", err) } diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index c90c3bd6e..dfc7fa635 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -626,10 +626,11 @@ type postCommitActionHandler struct { // Cached git objects — resolved once per PostCommit invocation to avoid // redundant reads across filesOverlapWithContent, filesWithRemainingAgentChanges, // CondenseSession, and calculateSessionAttributions. - headTree *object.Tree // HEAD commit tree (shared across all sessions) - parentTree *object.Tree // HEAD's first parent tree (shared, nil for initial commits) - shadowRef *plumbing.Reference // Per-session shadow branch ref (nil if branch doesn't exist) - shadowTree *object.Tree // Per-session shadow commit tree (nil if branch doesn't exist) + headTree *object.Tree // HEAD commit tree (shared across all sessions) + parentTree *object.Tree // HEAD's first parent tree (nil for initial commits or resolution failure) + parentCommitHash string // HEAD's first parent hash (empty iff parentTree is nil) + shadowRef *plumbing.Reference // Per-session shadow branch ref (nil if branch doesn't exist) + shadowTree *object.Tree // Per-session shadow commit tree (nil if branch doesn't exist) // Output: set by handler methods, read by caller after TransitionAndLog. condensed bool @@ -649,10 +650,12 @@ func (h *postCommitActionHandler) HandleCondense(state *session.State) error { if shouldCondense { h.condensed = h.s.condenseAndUpdateState(h.ctx, h.repo, h.checkpointID, state, h.head, h.shadowBranchName, h.shadowBranchesToDelete, h.committedFileSet, condenseOpts{ - shadowRef: h.shadowRef, - headTree: h.headTree, - repoDir: h.repoDir, - headCommitHash: h.newHead, + shadowRef: h.shadowRef, + headTree: h.headTree, + repoDir: h.repoDir, + headCommitHash: h.newHead, + parentTree: h.parentTree, + parentCommitHash: h.parentCommitHash, }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -675,10 +678,12 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St if shouldCondense { h.condensed = h.s.condenseAndUpdateState(h.ctx, h.repo, h.checkpointID, state, h.head, h.shadowBranchName, h.shadowBranchesToDelete, h.committedFileSet, condenseOpts{ - shadowRef: h.shadowRef, - headTree: h.headTree, - repoDir: h.repoDir, - headCommitHash: h.newHead, + shadowRef: h.shadowRef, + headTree: h.headTree, + repoDir: h.repoDir, + headCommitHash: h.newHead, + parentTree: h.parentTree, + parentCommitHash: h.parentCommitHash, }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -841,22 +846,16 @@ func (s *ManualCommitStrategy) PostCommit(ctx context.Context) error { //nolint: // per-session functions (filesOverlapWithContent, filesWithRemainingAgentChanges, // calculateSessionAttributions). _, resolveTreesSpan := perf.Start(ctx, "resolve_commit_trees") - var headTree *object.Tree - if t, err := commit.Tree(); err == nil { - headTree = t - } - var parentTree *object.Tree - if commit.NumParents() > 0 { - if parent, err := commit.Parent(0); err == nil { - if t, err := parent.Tree(); err == nil { - parentTree = t - } - } - } + headTree, parentTree, parentCommitHash := resolveCommitContext(ctx, commit) committedFileSet := filesChangedInCommit(ctx, worktreePath, commit, headTree, parentTree) resolveTreesSpan.End() + // attrAccumulator collects per-session data keyed by shadow branch name. + // PromptAttributions are captured before condensation clears them (Pass 1), + // then used in Pass 2 to compute combined attribution for multi-session checkpoints. + attrAccumulator := make(map[string][]sessionAttrData) + loopCtx, processSessionsLoop := perf.StartLoop(ctx, "process_sessions") for _, state := range sessions { // Skip fully-condensed ended sessions — no work remains. @@ -866,12 +865,45 @@ func (s *ManualCommitStrategy) PostCommit(ctx context.Context) error { //nolint: } iterCtx, iterSpan := processSessionsLoop.Iteration(loopCtx) s.postCommitProcessSession(iterCtx, repo, state, &transitionCtx, checkpointID, - head, commit, newHead, worktreePath, headTree, parentTree, committedFileSet, - shadowBranchesToDelete, uncondensedActiveOnBranch) + head, commit, newHead, worktreePath, headTree, parentTree, parentCommitHash, committedFileSet, + shadowBranchesToDelete, uncondensedActiveOnBranch, attrAccumulator) iterSpan.End() } processSessionsLoop.End() + // Pass 2: compute combined attribution for multi-session checkpoints. + // Runs after all condensation so the already-written CheckpointSummary can be patched. + _, combinedAttrSpan := perf.Start(ctx, "combined_attribution") + for _, sessionAttrs := range attrAccumulator { + if len(sessionAttrs) < 2 { + continue // Single session: per-session attribution already correct + } + combined := computeCombinedAttribution(ctx, repo, sessionAttrs) + if combined == nil { + continue + } + store, storeErr := s.getCheckpointStore() + if storeErr != nil { + logging.Warn(logCtx, "combined attribution: failed to get store", + slog.String("error", storeErr.Error())) + continue + } + if updateErr := store.UpdateCheckpointSummary(ctx, checkpoint.UpdateCheckpointSummaryOptions{ + CheckpointID: checkpointID, + CombinedAttribution: combined, + }); updateErr != nil { + logging.Warn(logCtx, "combined attribution: failed to update summary", + slog.String("checkpoint_id", checkpointID.String()), + slog.String("error", updateErr.Error())) + // Non-fatal: per-session attributions are still correct. + } else { + logging.Info(logCtx, "combined attribution written", + slog.String("checkpoint_id", checkpointID.String()), + slog.Int("sessions_merged", len(sessionAttrs))) + } + } + combinedAttrSpan.End() + // Clean up shadow branches — only delete when ALL sessions on the branch are non-active // or were condensed during this PostCommit. _, cleanupBranchesSpan := perf.Start(ctx, "cleanup_shadow_branches") @@ -912,9 +944,11 @@ func (s *ManualCommitStrategy) postCommitProcessSession( newHead string, repoDir string, headTree, parentTree *object.Tree, + parentCommitHash string, committedFileSet map[string]struct{}, shadowBranchesToDelete map[string]struct{}, uncondensedActiveOnBranch map[string]bool, + attrAccumulator map[string][]sessionAttrData, ) { logCtx := logging.WithComponent(ctx, "checkpoint") shadowBranchName := getShadowBranchNameForCommit(state.BaseCommit, state.WorktreeID) @@ -985,6 +1019,29 @@ func (s *ManualCommitStrategy) postCommitProcessSession( slog.Any("files", filesTouchedBefore), ) + // Snapshot PromptAttributions BEFORE condensation clears them. + // condenseAndUpdateState sets state.PromptAttributions = nil, so we must + // capture them here. The snapshot is only committed to attrAccumulator after + // we confirm condensation occurred (below), to avoid computing combined + // attribution for sessions that weren't actually condensed. + attrBase := state.AttributionBaseCommit + if attrBase == "" { + attrBase = state.BaseCommit + } + snapshotAttr := sessionAttrData{ + condenseOpts: condenseOpts{ + shadowRef: shadowRef, + headTree: headTree, + parentTree: parentTree, + repoDir: repoDir, + headCommitHash: newHead, + parentCommitHash: parentCommitHash, + }, + promptAttributions: append([]PromptAttribution(nil), state.PromptAttributions...), + filesTouched: append([]string(nil), filesTouchedBefore...), + attrBase: attrBase, + } + // Run the state machine transition with handler for strategy-specific actions. _, transitionAndCondenseSpan := perf.Start(ctx, "transition_and_condense") handler := &postCommitActionHandler{ @@ -1003,6 +1060,7 @@ func (s *ManualCommitStrategy) postCommitProcessSession( filesTouchedBefore: filesTouchedBefore, headTree: headTree, parentTree: parentTree, + parentCommitHash: parentCommitHash, shadowRef: shadowRef, shadowTree: shadowTree, } @@ -1013,6 +1071,13 @@ func (s *ManualCommitStrategy) postCommitProcessSession( } transitionAndCondenseSpan.End() + // Only include this session in the combined attribution accumulator if it + // was actually condensed. Non-condensed sessions haven't written their data + // to the checkpoint yet, so including them would produce premature results. + if handler.condensed { + attrAccumulator[shadowBranchName] = append(attrAccumulator[shadowBranchName], snapshotAttr) + } + // Record checkpoint ID for ACTIVE sessions so HandleTurnEnd can finalize // with full transcript. IDLE/ENDED sessions already have complete transcripts. // NOTE: This check runs AFTER TransitionAndLog updated the phase. It relies on diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index 4b4e9294e..637433862 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -3875,3 +3875,121 @@ func TestResolveFilesTouched_PrefersStateFallsBackToTranscript(t *testing.T) { } }) } + +// TestBugB_Candidate1_NoShadowSingleCheckpoint is a focused repro candidate for the +// single-session 0% AI bug (Bug B). This candidate targets the simplest no-shadow / +// first-checkpoint path: agent creates a new file from scratch, no user edits, no +// PromptAttributions. If this fails with AgentLines==0 or AgentPercentage==0, the +// no-shadow path is implicated. +// +// See docs/architecture/attribution.md for investigation notes. +func TestBugB_Candidate1_NoShadowSingleCheckpoint(t *testing.T) { + t.Parallel() + + agentContent := strings.Repeat("agent line\n", 10) + + baseTree := buildTestTree(t, map[string]string{}) + shadowTree := buildTestTree(t, map[string]string{ + "agent.go": agentContent, + }) + headTree := buildTestTree(t, map[string]string{ + "agent.go": agentContent, + }) + + // No-shadow path: use HEAD as shadow (shadowTree == headTree). + // No PromptAttributions (agent started on a clean worktree). + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, + shadowTree, + headTree, + nil, + []string{"agent.go"}, + nil, + "", "", "", "", + ) + + if result == nil { + t.Fatal("Bug B candidate 1: InitialAttribution is nil — no shadow path returned nil unexpectedly") + } + if result.AgentLines == 0 { + t.Errorf("Bug B candidate 1: AgentLines = 0, want > 0 (agent created all content)") + } + if result.AgentPercentage == 0 { + t.Errorf("Bug B candidate 1: AgentPercentage = 0, want > 0") + } + + t.Logf("Bug B candidate 1 result: agent=%d, human_added=%d, total=%d, pct=%.1f%%", + result.AgentLines, result.HumanAdded, result.TotalCommitted, result.AgentPercentage) +} + +// TestBugB_Candidate2_InflatedPromptAttribution is the second focused repro candidate +// for Bug B. This models the scenario where the user had many lines pre-written in the +// worktree at session start (captured in PromptAttributions), and the agent then rewrote +// the file. The inflated accumulatedToAgentFiles can exceed totalAgentAndUserWork, +// clamping totalAgentAdded to zero and producing 0% AI despite real agent work. +// +// Scenario: +// - base: file.go has 10 lines +// - user pre-wrote 15 extra lines before session: worktree = 25 lines +// - PromptAttribution captures 15 user lines for file.go +// - agent rewrites the file: shadow = 20 lines (net +10 from base) +// - head: same as shadow (no post-checkpoint user edits) +// - totalAgentAndUserWork = base→shadow = +10 +// - accumulatedToAgentFiles = 15 +// - totalAgentAdded = max(0, 10-15) = 0 → 0% AI (BUG if this fails) +// +// See docs/architecture/attribution.md for investigation notes. +func TestBugB_Candidate2_InflatedPromptAttribution(t *testing.T) { + t.Parallel() + t.Skip("Bug B open investigation: inflated PromptAttribution zeroes agent credit when user pre-writes lines in agent-touched file before session start. Repro confirmed — fix requires gross-addition tracking. See docs/architecture/attribution.md.") + + baseLines := strings.Repeat("base line\n", 10) + shadowLines := baseLines + strings.Repeat("agent line\n", 10) + // head == shadow: no post-checkpoint user edits + headLines := shadowLines + + baseTree := buildTestTree(t, map[string]string{"file.go": baseLines}) + shadowTree := buildTestTree(t, map[string]string{"file.go": shadowLines}) + headTree := buildTestTree(t, map[string]string{"file.go": headLines}) + + // User pre-wrote 15 lines into file.go before the session started. + // (In a real session, calculatePromptAttributionAtStart would capture these.) + promptAttributions := []PromptAttribution{ + { + CheckpointNumber: 1, + UserLinesAdded: 15, + UserLinesRemoved: 0, + UserAddedPerFile: map[string]int{"file.go": 15}, + }, + } + + result := CalculateAttributionWithAccumulated( + context.Background(), + baseTree, + shadowTree, + headTree, + nil, + []string{"file.go"}, + promptAttributions, + "", "", "", "", + ) + + if result == nil { + t.Fatal("Bug B candidate 2: InitialAttribution is nil") + } + + t.Logf("Bug B candidate 2 result: agent=%d, human_added=%d, total=%d, pct=%.1f%%", + result.AgentLines, result.HumanAdded, result.TotalCommitted, result.AgentPercentage) + + // Expected correct behavior: agent added 10 lines (base→shadow diff = +10), + // so AgentLines should be > 0 and AgentPercentage should be > 0. + // If this test fails, the PromptAttribution inflation path is a deterministic + // repro for Bug B. + if result.AgentLines == 0 { + t.Errorf("Bug B candidate 2: AgentLines = 0, want > 0 — inflated PromptAttribution is zeroing agent credit") + } + if result.AgentPercentage == 0 { + t.Errorf("Bug B candidate 2: AgentPercentage = 0, want > 0") + } +}