Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ type Store interface {
// session transcript (prompt to stop event).
// Returns ErrCheckpointNotFound if the checkpoint doesn't exist.
UpdateCommitted(ctx context.Context, opts UpdateCommittedOptions) error

// UpdateCheckpointSummary patches the root CheckpointSummary for an existing checkpoint.
// Used after all sessions are condensed to store combined multi-session attribution.
// Returns ErrCheckpointNotFound if the checkpoint does not exist.
UpdateCheckpointSummary(ctx context.Context, opts UpdateCheckpointSummaryOptions) error
}

// WriteTemporaryResult contains the result of writing a temporary checkpoint.
Expand Down Expand Up @@ -312,6 +317,15 @@ type UpdateCommittedOptions struct {
Agent types.AgentType
}

// UpdateCheckpointSummaryOptions holds options for patching the root CheckpointSummary.
type UpdateCheckpointSummaryOptions struct {
// CheckpointID identifies the checkpoint whose root metadata.json to update.
CheckpointID id.CheckpointID

// CombinedAttribution is the merged attribution computed across all sessions.
CombinedAttribution *InitialAttribution
}

// CommittedInfo contains summary information about a committed checkpoint.
type CommittedInfo struct {
// CheckpointID is the stable 12-hex-char identifier
Expand Down Expand Up @@ -451,6 +465,10 @@ type CheckpointSummary struct {
FilesTouched []string `json:"files_touched"`
Sessions []SessionFilePaths `json:"sessions"`
TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"`
// CombinedAttribution is computed once across all sessions sharing a shadow branch.
// Only present for multi-session checkpoints written by CLI v0.x+.
// When present, prefer this over per-session InitialAttribution in aggregation.
CombinedAttribution *InitialAttribution `json:"combined_attribution,omitempty"`
}

// SessionMetrics contains hook-provided session metrics from agents that report
Expand Down
76 changes: 76 additions & 0 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,82 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint
return nil
}

// UpdateCheckpointSummary patches the root-level metadata.json (CheckpointSummary)
// for an existing checkpoint, setting CombinedAttribution.
// Returns ErrCheckpointNotFound if the checkpoint does not exist.
func (s *GitStore) UpdateCheckpointSummary(ctx context.Context, opts UpdateCheckpointSummaryOptions) error {
if opts.CombinedAttribution == nil {
return nil // No-op: nothing to patch
}

if err := ctx.Err(); err != nil {
return err //nolint:wrapcheck // Propagating context cancellation
}

if err := s.ensureSessionsBranch(); err != nil {
return fmt.Errorf("failed to ensure sessions branch: %w", err)
}

parentHash, rootTreeHash, err := s.getSessionsBranchRef()
if err != nil {
return err
}

basePath := opts.CheckpointID.Path() + "/"
checkpointPath := opts.CheckpointID.Path()
entries, err := s.flattenCheckpointEntries(rootTreeHash, checkpointPath)
if err != nil {
return err
}

rootMetadataPath := basePath + paths.MetadataFileName
entry, exists := entries[rootMetadataPath]
if !exists {
return ErrCheckpointNotFound
}

summary, err := s.readSummaryFromBlob(entry.Hash)
if err != nil {
return fmt.Errorf("failed to read checkpoint summary: %w", err)
}

summary.CombinedAttribution = opts.CombinedAttribution

summaryJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal summary: %w", err)
}
summaryHash, err := CreateBlobFromContent(s.repo, summaryJSON)
if err != nil {
return fmt.Errorf("failed to create summary blob: %w", err)
}
entries[rootMetadataPath] = object.TreeEntry{
Name: rootMetadataPath,
Mode: filemode.Regular,
Hash: summaryHash,
}

newTreeHash, err := s.spliceCheckpointSubtree(rootTreeHash, opts.CheckpointID, basePath, entries)
if err != nil {
return err
}

authorName, authorEmail := GetGitAuthorFromRepo(s.repo)
commitMsg := fmt.Sprintf("Checkpoint: %s\n\nEntire-Combined-Attribution: true", opts.CheckpointID)
newCommitHash, err := s.createCommit(newTreeHash, parentHash, commitMsg, authorName, authorEmail)
if err != nil {
return err
}

refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName)
newRef := plumbing.NewHashReference(refName, newCommitHash)
if err := s.repo.Storer.SetReference(newRef); err != nil {
return fmt.Errorf("failed to set branch reference: %w", err)
}

return nil
}

// UpdateCommitted replaces the transcript, prompts, and context for an existing
// committed checkpoint. Uses replace semantics: the full session transcript is
// written, replacing whatever was stored at initial condensation time.
Expand Down
117 changes: 117 additions & 0 deletions cmd/entire/cli/checkpoint/committed_update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,5 +580,122 @@ func TestGetGitAuthorFromRepo_NoConfig(t *testing.T) {
}
}

func TestUpdateCheckpointSummary_SetsCombinedAttribution(t *testing.T) {
t.Parallel()
_, store, cpID := setupRepoForUpdate(t)

combined := &InitialAttribution{
AgentLines: 15,
HumanAdded: 3,
HumanModified: 1,
HumanRemoved: 0,
TotalCommitted: 18,
AgentPercentage: 83.3,
}

err := store.UpdateCheckpointSummary(context.Background(), UpdateCheckpointSummaryOptions{
CheckpointID: cpID,
CombinedAttribution: combined,
})
if err != nil {
t.Fatalf("UpdateCheckpointSummary() error = %v", err)
}

summary, err := store.ReadCommitted(context.Background(), cpID)
if err != nil {
t.Fatalf("ReadCommitted() error = %v", err)
}

if summary.CombinedAttribution == nil {
t.Fatal("CombinedAttribution is nil after update")
}
if summary.CombinedAttribution.AgentLines != 15 {
t.Errorf("AgentLines = %d, want 15", summary.CombinedAttribution.AgentLines)
}
if summary.CombinedAttribution.HumanAdded != 3 {
t.Errorf("HumanAdded = %d, want 3", summary.CombinedAttribution.HumanAdded)
}
if summary.CombinedAttribution.AgentPercentage != 83.3 {
t.Errorf("AgentPercentage = %.1f, want 83.3", summary.CombinedAttribution.AgentPercentage)
}
}

func TestUpdateCheckpointSummary_NonexistentCheckpoint(t *testing.T) {
t.Parallel()
_, store, _ := setupRepoForUpdate(t)

err := store.UpdateCheckpointSummary(context.Background(), UpdateCheckpointSummaryOptions{
CheckpointID: id.MustCheckpointID("deadbeef1234"),
CombinedAttribution: &InitialAttribution{
AgentLines: 10,
},
})
if err == nil {
t.Fatal("expected error for nonexistent checkpoint, got nil")
}
}

func TestUpdateCheckpointSummary_PreservesExistingFields(t *testing.T) {
t.Parallel()
_, store, cpID := setupRepoForUpdate(t)

summaryBefore, err := store.ReadCommitted(context.Background(), cpID)
if err != nil {
t.Fatalf("ReadCommitted() before error = %v", err)
}

err = store.UpdateCheckpointSummary(context.Background(), UpdateCheckpointSummaryOptions{
CheckpointID: cpID,
CombinedAttribution: &InitialAttribution{
AgentLines: 10,
TotalCommitted: 10,
AgentPercentage: 100.0,
},
})
if err != nil {
t.Fatalf("UpdateCheckpointSummary() error = %v", err)
}

summaryAfter, err := store.ReadCommitted(context.Background(), cpID)
if err != nil {
t.Fatalf("ReadCommitted() after error = %v", err)
}

if summaryAfter.CheckpointID != summaryBefore.CheckpointID {
t.Errorf("CheckpointID changed: %q -> %q", summaryBefore.CheckpointID, summaryAfter.CheckpointID)
}
if summaryAfter.Strategy != summaryBefore.Strategy {
t.Errorf("Strategy changed: %q -> %q", summaryBefore.Strategy, summaryAfter.Strategy)
}
if len(summaryAfter.Sessions) != len(summaryBefore.Sessions) {
t.Errorf("Sessions count changed: %d -> %d", len(summaryBefore.Sessions), len(summaryAfter.Sessions))
}
if summaryAfter.CombinedAttribution == nil {
t.Fatal("CombinedAttribution should be set after update")
}
}

func TestUpdateCheckpointSummary_NilCombinedAttributionIsNoop(t *testing.T) {
t.Parallel()
_, store, cpID := setupRepoForUpdate(t)

// Nil CombinedAttribution should be a no-op (no error, no unnecessary commit)
err := store.UpdateCheckpointSummary(context.Background(), UpdateCheckpointSummaryOptions{
CheckpointID: cpID,
CombinedAttribution: nil,
})
if err != nil {
t.Fatalf("UpdateCheckpointSummary(nil) error = %v", err)
}

summary, err := store.ReadCommitted(context.Background(), cpID)
if err != nil {
t.Fatalf("ReadCommitted() error = %v", err)
}
if summary.CombinedAttribution != nil {
t.Errorf("CombinedAttribution should remain nil, got %+v", summary.CombinedAttribution)
}
}

// Verify go-git config import is used (compile-time check).
var _ = config.GlobalScope
72 changes: 72 additions & 0 deletions cmd/entire/cli/integration_test/attribution_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package integration

import (
"encoding/json"
"strings"
"testing"

"github.com/entireio/cli/cmd/entire/cli/checkpoint"
Expand Down Expand Up @@ -516,6 +517,77 @@ func TestManualCommit_AttributionNoDoubleCount(t *testing.T) {
}
}

func TestManualCommit_Attribution_IntermediateCommit(t *testing.T) {
t.Parallel()
env := NewTestEnv(t)
defer env.Cleanup()

env.InitRepo()

env.WriteFile("existing.go", "package main\n")
env.GitAdd("existing.go")
env.GitCommit("Initial commit")

env.InitEntire()

session := env.NewSession()
if err := env.SimulateUserPromptSubmit(session.ID); err != nil {
t.Fatalf("SimulateUserPromptSubmit failed: %v", err)
}

agentContent := "package main\n\n" + strings.Repeat("// agent line\n", 18)
env.WriteFile("agent.go", agentContent)

session.CreateTranscript(
"Create agent.go with agent-authored lines",
[]FileChange{{Path: "agent.go", Content: agentContent}},
)
if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil {
t.Fatalf("SimulateStop failed: %v", err)
}

unrelatedContent := "package main\n\n" + strings.Repeat("// unrelated line\n", 48)
env.WriteFile("unrelated.go", unrelatedContent)
env.GitAdd("unrelated.go")
env.GitCommit("Add unrelated.go without hooks")

env.GitCommitWithShadowHooks("Add agent work", "agent.go")

repo, err := git.PlainOpen(env.RepoDir)
if err != nil {
t.Fatalf("failed to open repo: %v", err)
}

headHash := env.GetHeadHash()
commitObj, err := repo.CommitObject(plumbing.NewHash(headHash))
if err != nil {
t.Fatalf("failed to get commit object: %v", err)
}

checkpointID, found := trailers.ParseCheckpoint(commitObj.Message)
if !found {
t.Fatal("Commit should have Entire-Checkpoint trailer")
}

attr := getAttributionFromMetadata(t, repo, checkpointID)

// agent.go: 2-line header + 18 agent lines = 20 lines.
// unrelated.go must not appear in this commit's attribution — the fix scopes
// non-agent file enumeration to parent→HEAD only.
if attr.AgentLines != 20 {
t.Errorf("AgentLines = %d, want 20", attr.AgentLines)
}
if attr.HumanAdded != 0 {
t.Errorf("HumanAdded = %d, want 0 (unrelated.go committed separately)", attr.HumanAdded)
}
if attr.TotalCommitted != 20 {
t.Errorf("TotalCommitted = %d, want 20 (only agent.go in this commit)", attr.TotalCommitted)
}
if attr.AgentPercentage < 99.9 || attr.AgentPercentage > 100.1 {
t.Errorf("AgentPercentage = %.1f%%, want 100.0%%", attr.AgentPercentage)
}
}

// getAttributionFromMetadata reads attribution from a checkpoint on entire/checkpoints/v1 branch.
// InitialAttribution is stored in session-level metadata (0/metadata.json).
func getAttributionFromMetadata(t *testing.T, repo *git.Repository, checkpointID id.CheckpointID) *checkpoint.InitialAttribution {
Expand Down
35 changes: 29 additions & 6 deletions cmd/entire/cli/strategy/manual_commit_attribution.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,9 @@ func countLinesStr(content string) int {
// 4. Estimate user self-modifications vs agent modifications using per-file tracking
// 5. Compute percentages
//
// attributionBaseCommit and headCommitHash are optional commit hashes for fast non-agent
// file detection via git diff-tree. When empty, falls back to go-git tree walk.
// attributionBaseCommit, headCommitHash, and parentCommitHash are optional commit hashes
// for fast non-agent file detection via git diff-tree. When empty, falls back to go-git
// tree walk using the provided trees.
//
// Note: Binary files (detected by null bytes) are silently excluded from attribution
// calculations since line-based diffing only applies to text files.
Expand All @@ -191,11 +192,13 @@ func CalculateAttributionWithAccumulated(
baseTree *object.Tree,
shadowTree *object.Tree,
headTree *object.Tree,
parentTree *object.Tree,
filesTouched []string,
promptAttributions []PromptAttribution,
repoDir string,
attributionBaseCommit string,
headCommitHash string,
parentCommitHash string,
) *checkpoint.InitialAttribution {
if len(filesTouched) == 0 {
return nil
Expand Down Expand Up @@ -242,9 +245,29 @@ func CalculateAttributionWithAccumulated(
}
}

// Calculate total user edits to non-agent files (files not in filesTouched)
// These files are not in the shadow tree, so base→head captures ALL their user edits
allChangedFiles, err := getAllChangedFiles(ctx, baseTree, headTree, repoDir, attributionBaseCommit, headCommitHash)
// Calculate total user edits to non-agent files (files not in filesTouched).
// Scope this to the attributed commit only by diffing parent→head. If parent
// context is unavailable, fall back to the session base behavior.
// IMPORTANT: nonAgentBaseTree and nonAgentBaseCommit must reference the same
// commit — getAllChangedFiles has a fast path (CLI diff-tree using hashes) and
// a slow path (go-git tree walk using trees). If they diverge, attribution
// produces inconsistent results depending on which path fires.
nonAgentBaseTree := baseTree
nonAgentBaseCommit := attributionBaseCommit
if parentTree != nil {
nonAgentBaseTree = parentTree
nonAgentBaseCommit = parentCommitHash
} else if parentCommitHash != "" {
// parentCommitHash is set but parentTree is nil — parent object resolution
// failed (e.g. shallow clone, pack corruption). Fall back to session base
// for both tree and hash to keep them in sync.
logging.Warn(logging.WithComponent(ctx, "attribution"),
"attribution: parent tree unavailable despite parent hash being set; non-agent file counts may be inflated",
slog.String("parent_commit_hash", parentCommitHash),
)
}

allChangedFiles, err := getAllChangedFiles(ctx, nonAgentBaseTree, headTree, repoDir, nonAgentBaseCommit, headCommitHash)
if err != nil {
logging.Warn(logging.WithComponent(ctx, "attribution"),
"attribution: failed to enumerate changed files",
Expand All @@ -258,7 +281,7 @@ func CalculateAttributionWithAccumulated(
continue // Skip agent-touched files
}

baseContent := getFileContent(baseTree, filePath)
baseContent := getFileContent(nonAgentBaseTree, filePath)
headContent := getFileContent(headTree, filePath)
_, userAdded, _ := diffLines(baseContent, headContent)
allUserEditsToNonAgentFiles += userAdded
Expand Down
Loading
Loading