diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 40133fbcf..f3300a366 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -419,8 +419,8 @@ func (m CommittedMetadata) GetTranscriptStart() int { // Used in CheckpointSummary.Sessions to map session IDs to their file locations. type SessionFilePaths struct { Metadata string `json:"metadata"` - Transcript string `json:"transcript"` - ContentHash string `json:"content_hash"` + Transcript string `json:"transcript,omitempty"` + ContentHash string `json:"content_hash,omitempty"` Prompt string `json:"prompt"` } diff --git a/cmd/entire/cli/checkpoint/v2_committed.go b/cmd/entire/cli/checkpoint/v2_committed.go new file mode 100644 index 000000000..f728ee511 --- /dev/null +++ b/cmd/entire/cli/checkpoint/v2_committed.go @@ -0,0 +1,480 @@ +package checkpoint + +import ( + "context" + "crypto/sha256" + "errors" + "fmt" + "log/slog" + "os" + "strings" + "time" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/agent/types" + "github.com/entireio/cli/cmd/entire/cli/jsonutil" + "github.com/entireio/cli/cmd/entire/cli/logging" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/validation" + "github.com/entireio/cli/cmd/entire/cli/versioninfo" + "github.com/entireio/cli/redact" + + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/filemode" + "github.com/go-git/go-git/v6/plumbing/object" +) + +// WriteCommitted writes a committed checkpoint to both v2 refs: +// - /main: metadata and prompts (no raw transcript or content hash) +// - /full/current: raw transcript + content hash (replaces previous content) +// +// This is the public entry point for v2 dual-writes. The session index is +// determined from the /main ref and passed to the /full/current write to +// keep both refs consistent. +func (s *V2GitStore) WriteCommitted(ctx context.Context, opts WriteCommittedOptions) error { + sessionIndex, err := s.writeCommittedMain(ctx, opts) + if err != nil { + return fmt.Errorf("v2 /main write failed: %w", err) + } + + if err := s.writeCommittedFullTranscript(ctx, opts, sessionIndex); err != nil { + return fmt.Errorf("v2 /full/current write failed: %w", err) + } + + return nil +} + +// UpdateCommitted replaces the prompts and/or transcript for an existing v2 checkpoint. +// Called at stop time to finalize checkpoints with the complete session transcript. +// +// On /main: replaces prompts (transcript is not stored there). +// On /full/current: replaces the raw transcript (if provided). +// +// Returns ErrCheckpointNotFound if the checkpoint doesn't exist on /main. +func (s *V2GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOptions) error { + if opts.CheckpointID.IsEmpty() { + return errors.New("invalid update options: checkpoint ID is required") + } + + sessionIndex, err := s.updateCommittedMain(ctx, opts) + if err != nil { + return fmt.Errorf("v2 /main update failed: %w", err) + } + + if len(opts.Transcript) > 0 { + if err := s.updateCommittedFullTranscript(ctx, opts, sessionIndex); err != nil { + return fmt.Errorf("v2 /full/current update failed: %w", err) + } + } + + return nil +} + +// updateCommittedMain updates prompts on the /main ref for an existing checkpoint. +// Returns the session index for coordination with /full/current. +func (s *V2GitStore) updateCommittedMain(ctx context.Context, opts UpdateCommittedOptions) (int, error) { + refName := plumbing.ReferenceName(paths.V2MainRefName) + parentHash, rootTreeHash, err := s.getRefState(refName) + if err != nil { + return 0, ErrCheckpointNotFound + } + + basePath := opts.CheckpointID.Path() + "/" + checkpointPath := opts.CheckpointID.Path() + + entries, err := s.gs.flattenCheckpointEntries(rootTreeHash, checkpointPath) + if err != nil { + return 0, err + } + + // Read root summary to find session index + rootMetadataPath := basePath + paths.MetadataFileName + entry, exists := entries[rootMetadataPath] + if !exists { + return 0, ErrCheckpointNotFound + } + + summary, err := readJSONFromBlob[CheckpointSummary](s.repo, entry.Hash) + if err != nil { + return 0, fmt.Errorf("failed to read checkpoint summary: %w", err) + } + if len(summary.Sessions) == 0 { + return 0, ErrCheckpointNotFound + } + + // Find session index by ID, fall back to latest + sessionIndex := s.gs.findSessionIndex(ctx, basePath, summary, entries, opts.SessionID) + if sessionIndex >= len(summary.Sessions) { + // findSessionIndex returns next-available when not found; fall back to latest + sessionIndex = len(summary.Sessions) - 1 + logging.Debug(ctx, "v2 UpdateCommitted: session ID not found, falling back to latest", + slog.String("session_id", opts.SessionID), + slog.String("checkpoint_id", string(opts.CheckpointID)), + slog.Int("fallback_index", sessionIndex), + ) + } + + sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex) + + // Replace prompts + if len(opts.Prompts) > 0 { + promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n")) + blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) + if err != nil { + return 0, fmt.Errorf("failed to create prompt blob: %w", err) + } + entries[sessionPath+paths.PromptFileName] = object.TreeEntry{ + Name: sessionPath + paths.PromptFileName, + Mode: filemode.Regular, + Hash: blobHash, + } + } + + newTreeHash, err := s.gs.spliceCheckpointSubtree(rootTreeHash, opts.CheckpointID, basePath, entries) + if err != nil { + return 0, err + } + + authorName, authorEmail := GetGitAuthorFromRepo(s.repo) + commitMsg := fmt.Sprintf("Finalize checkpoint: %s\n", opts.CheckpointID) + if err := s.updateRef(refName, newTreeHash, parentHash, commitMsg, authorName, authorEmail); err != nil { + return 0, err + } + + return sessionIndex, nil +} + +// updateCommittedFullTranscript replaces the transcript for a specific checkpoint +// on /full/current while preserving other checkpoints' transcripts in the tree. +func (s *V2GitStore) updateCommittedFullTranscript(ctx context.Context, opts UpdateCommittedOptions, sessionIndex int) error { + refName := plumbing.ReferenceName(paths.V2FullCurrentRefName) + if err := s.ensureRef(refName); err != nil { + return fmt.Errorf("failed to ensure /full/current ref: %w", err) + } + + parentHash, rootTreeHash, err := s.getRefState(refName) + if err != nil { + return err + } + + basePath := opts.CheckpointID.Path() + "/" + checkpointPath := opts.CheckpointID.Path() + sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex) + + // Read existing entries and replace transcript for this checkpoint only + entries, err := s.gs.flattenCheckpointEntries(rootTreeHash, checkpointPath) + if err != nil { + return err + } + + // Clear existing transcript entries at this session path before writing new ones + for key := range entries { + if strings.HasPrefix(key, sessionPath) { + delete(entries, key) + } + } + + redactedTranscript, err := s.writeTranscriptBlobs(ctx, opts.Transcript, opts.Agent, sessionPath, entries) + if err != nil { + return err + } + + if err := s.writeContentHash(redactedTranscript, sessionPath, entries); err != nil { + return err + } + + // Splice into existing root tree (preserves other checkpoints' transcripts) + newTreeHash, err := s.gs.spliceCheckpointSubtree(rootTreeHash, opts.CheckpointID, basePath, entries) + if err != nil { + return err + } + + authorName, authorEmail := GetGitAuthorFromRepo(s.repo) + commitMsg := fmt.Sprintf("Finalize checkpoint: %s\n", opts.CheckpointID) + return s.updateRef(refName, newTreeHash, parentHash, commitMsg, authorName, authorEmail) +} + +// writeCommittedMain writes metadata entries to the /main ref. +// This includes session metadata and prompts — but NOT the raw transcript +// (full.jsonl) or content hash (content_hash.txt), which go to /full/current. +// Returns the session index used, so the caller can pass it to writeCommittedFullTranscript. +func (s *V2GitStore) writeCommittedMain(ctx context.Context, opts WriteCommittedOptions) (int, error) { + if err := validateWriteOpts(opts); err != nil { + return 0, err + } + + refName := plumbing.ReferenceName(paths.V2MainRefName) + if err := s.ensureRef(refName); err != nil { + return 0, fmt.Errorf("failed to ensure /main ref: %w", err) + } + + parentHash, rootTreeHash, err := s.getRefState(refName) + if err != nil { + return 0, err + } + + basePath := opts.CheckpointID.Path() + "/" + checkpointPath := opts.CheckpointID.Path() + + // Read existing entries at this checkpoint's shard path + entries, err := s.gs.flattenCheckpointEntries(rootTreeHash, checkpointPath) + if err != nil { + return 0, err + } + + // Build main session entries (metadata, prompts — no transcript or content hash) + sessionIndex, err := s.writeMainCheckpointEntries(ctx, opts, basePath, entries) + if err != nil { + return 0, err + } + + // Splice entries into root tree + newTreeHash, err := s.gs.spliceCheckpointSubtree(rootTreeHash, opts.CheckpointID, basePath, entries) + if err != nil { + return 0, err + } + + commitMsg := fmt.Sprintf("Checkpoint: %s\n", opts.CheckpointID) + if err := s.updateRef(refName, newTreeHash, parentHash, commitMsg, opts.AuthorName, opts.AuthorEmail); err != nil { + return 0, err + } + return sessionIndex, nil +} + +// writeMainCheckpointEntries orchestrates writing session data to the /main ref. +// It mirrors GitStore.writeStandardCheckpointEntries but excludes raw transcript blobs. +// Returns the session index used, for coordination with writeCommittedFullTranscript. +func (s *V2GitStore) writeMainCheckpointEntries(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry) (int, error) { + // Read existing summary to get current session count + var existingSummary *CheckpointSummary + metadataPath := basePath + paths.MetadataFileName + if entry, exists := entries[metadataPath]; exists { + existing, err := readJSONFromBlob[CheckpointSummary](s.repo, entry.Hash) + if err == nil { + existingSummary = existing + } + } + + // Determine session index + sessionIndex := s.gs.findSessionIndex(ctx, basePath, existingSummary, entries, opts.SessionID) + + // Write session files (metadata and prompts — no transcript or content hash) + sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex) + sessionFilePaths, err := s.writeMainSessionToSubdirectory(opts, sessionPath, entries) + if err != nil { + return 0, err + } + + // Build the sessions array + var sessions []SessionFilePaths + if existingSummary != nil { + sessions = make([]SessionFilePaths, max(len(existingSummary.Sessions), sessionIndex+1)) + copy(sessions, existingSummary.Sessions) + } else { + sessions = make([]SessionFilePaths, 1) + } + sessions[sessionIndex] = sessionFilePaths + + // Write root CheckpointSummary + if err := s.gs.writeCheckpointSummary(opts, basePath, entries, sessions); err != nil { + return 0, err + } + return sessionIndex, nil +} + +// writeMainSessionToSubdirectory writes a single session's metadata, prompts, and +// content hash to a session subdirectory (0/, 1/, 2/, … indexed by session order +// within the checkpoint). Unlike the v1 equivalent, this does NOT write the raw +// transcript (full.jsonl) — that goes to /full/current. +func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) { + filePaths := SessionFilePaths{} + + // Clear existing entries at this session path + for key := range entries { + if strings.HasPrefix(key, sessionPath) { + delete(entries, key) + } + } + + // Write prompts + if len(opts.Prompts) > 0 { + promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n")) + blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) + if err != nil { + return filePaths, err + } + entries[sessionPath+paths.PromptFileName] = object.TreeEntry{ + Name: sessionPath + paths.PromptFileName, + Mode: filemode.Regular, + Hash: blobHash, + } + filePaths.Prompt = "/" + sessionPath + paths.PromptFileName + } + + // Write session metadata + sessionMetadata := CommittedMetadata{ + CheckpointID: opts.CheckpointID, + SessionID: opts.SessionID, + Strategy: opts.Strategy, + CreatedAt: time.Now().UTC(), + Branch: opts.Branch, + CheckpointsCount: opts.CheckpointsCount, + FilesTouched: opts.FilesTouched, + Agent: opts.Agent, + Model: opts.Model, + TurnID: opts.TurnID, + IsTask: opts.IsTask, + ToolUseID: opts.ToolUseID, + TranscriptIdentifierAtStart: opts.TranscriptIdentifierAtStart, + CheckpointTranscriptStart: opts.CheckpointTranscriptStart, + TranscriptLinesAtStart: opts.CheckpointTranscriptStart, + TokenUsage: opts.TokenUsage, + SessionMetrics: opts.SessionMetrics, + InitialAttribution: opts.InitialAttribution, + Summary: redactSummary(opts.Summary), + CLIVersion: versioninfo.Version, + } + + metadataJSON, err := jsonutil.MarshalIndentWithNewline(sessionMetadata, "", " ") + if err != nil { + return filePaths, fmt.Errorf("failed to marshal session metadata: %w", err) + } + metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) + if err != nil { + return filePaths, err + } + entries[sessionPath+paths.MetadataFileName] = object.TreeEntry{ + Name: sessionPath + paths.MetadataFileName, + Mode: filemode.Regular, + Hash: metadataHash, + } + filePaths.Metadata = "/" + sessionPath + paths.MetadataFileName + + return filePaths, nil +} + +// writeContentHash computes and writes the content hash for already-redacted transcript bytes. +func (s *V2GitStore) writeContentHash(redactedTranscript []byte, sessionPath string, entries map[string]object.TreeEntry) error { + contentHash := fmt.Sprintf("sha256:%x", sha256.Sum256(redactedTranscript)) + hashBlob, err := CreateBlobFromContent(s.repo, []byte(contentHash)) + if err != nil { + return err + } + entries[sessionPath+paths.ContentHashFileName] = object.TreeEntry{ + Name: sessionPath + paths.ContentHashFileName, + Mode: filemode.Regular, + Hash: hashBlob, + } + return nil +} + +// writeCommittedFullTranscript writes the raw transcript to the /full/current ref. +// Transcripts accumulate across checkpoints — each write splices into the existing +// tree. When the ref reaches capacity, generation rotation (future work) archives +// the current ref and starts a fresh one. +// +// sessionIndex is the session slot (0-based), determined by the caller to stay +// consistent with the /main ref's session numbering. +// This is a no-op if opts.Transcript is empty (and opts.TranscriptPath is unset). +func (s *V2GitStore) writeCommittedFullTranscript(ctx context.Context, opts WriteCommittedOptions, sessionIndex int) error { + transcript := opts.Transcript + if len(transcript) == 0 && opts.TranscriptPath != "" { + var readErr error + transcript, readErr = os.ReadFile(opts.TranscriptPath) + if readErr != nil { + transcript = nil + } + } + if len(transcript) == 0 { + return nil // No transcript to write + } + + if err := validateWriteOpts(opts); err != nil { + return err + } + + refName := plumbing.ReferenceName(paths.V2FullCurrentRefName) + if err := s.ensureRef(refName); err != nil { + return fmt.Errorf("failed to ensure /full/current ref: %w", err) + } + + parentHash, rootTreeHash, err := s.getRefState(refName) + if err != nil { + return err + } + + basePath := opts.CheckpointID.Path() + "/" + checkpointPath := opts.CheckpointID.Path() + sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex) + + // Read existing entries at this checkpoint's shard path (accumulate, don't replace) + entries, err := s.gs.flattenCheckpointEntries(rootTreeHash, checkpointPath) + if err != nil { + return err + } + + redactedTranscript, err := s.writeTranscriptBlobs(ctx, transcript, opts.Agent, sessionPath, entries) + if err != nil { + return err + } + + if err := s.writeContentHash(redactedTranscript, sessionPath, entries); err != nil { + return err + } + + // Splice into existing root tree (preserves other checkpoints' transcripts) + newTreeHash, err := s.gs.spliceCheckpointSubtree(rootTreeHash, opts.CheckpointID, basePath, entries) + if err != nil { + return err + } + + commitMsg := fmt.Sprintf("Checkpoint: %s\n", opts.CheckpointID) + return s.updateRef(refName, newTreeHash, parentHash, commitMsg, opts.AuthorName, opts.AuthorEmail) +} + +// writeTranscriptBlobs writes redacted, chunked transcript blobs to entries. +// Returns the redacted transcript bytes so the caller can compute the content hash. +func (s *V2GitStore) writeTranscriptBlobs(ctx context.Context, transcript []byte, agentType types.AgentType, sessionPath string, entries map[string]object.TreeEntry) ([]byte, error) { + // Redact secrets before chunking + redacted, err := redact.JSONLBytes(transcript) + if err != nil { + return nil, fmt.Errorf("failed to redact transcript: %w", err) + } + + chunks, err := agent.ChunkTranscript(ctx, redacted, agentType) + if err != nil { + return nil, fmt.Errorf("failed to chunk transcript: %w", err) + } + + for i, chunk := range chunks { + chunkPath := sessionPath + agent.ChunkFileName(paths.TranscriptFileName, i) + blobHash, err := CreateBlobFromContent(s.repo, chunk) + if err != nil { + return nil, err + } + entries[chunkPath] = object.TreeEntry{ + Name: chunkPath, + Mode: filemode.Regular, + Hash: blobHash, + } + } + + return redacted, nil +} + +// validateWriteOpts validates identifiers in WriteCommittedOptions. +func validateWriteOpts(opts WriteCommittedOptions) error { + if opts.CheckpointID.IsEmpty() { + return errors.New("invalid checkpoint options: checkpoint ID is required") + } + if err := validation.ValidateSessionID(opts.SessionID); err != nil { + return fmt.Errorf("invalid checkpoint options: %w", err) + } + if err := validation.ValidateToolUseID(opts.ToolUseID); err != nil { + return fmt.Errorf("invalid checkpoint options: %w", err) + } + if err := validation.ValidateAgentID(opts.AgentID); err != nil { + return fmt.Errorf("invalid checkpoint options: %w", err) + } + return nil +} diff --git a/cmd/entire/cli/checkpoint/v2_store.go b/cmd/entire/cli/checkpoint/v2_store.go new file mode 100644 index 000000000..5acccda75 --- /dev/null +++ b/cmd/entire/cli/checkpoint/v2_store.go @@ -0,0 +1,88 @@ +package checkpoint + +import ( + "fmt" + + "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/object" +) + +// V2GitStore provides checkpoint storage operations for the v2 ref layout. +// It writes to two custom refs under refs/entire/: +// - /main: permanent metadata + compact transcripts +// - /full/current: active generation of raw transcripts +// +// V2GitStore is separate from GitStore (v1) to keep concerns isolated +// and simplify future v1 removal. It composes GitStore internally to +// reuse ref-agnostic entry-building helpers (tree surgery, session +// indexing, summary aggregation). +type V2GitStore struct { + repo *git.Repository + gs *GitStore // shared entry-building helpers (same package) +} + +// NewV2GitStore creates a new v2 checkpoint store backed by the given git repository. +func NewV2GitStore(repo *git.Repository) *V2GitStore { + return &V2GitStore{ + repo: repo, + gs: &GitStore{repo: repo}, + } +} + +// ensureRef ensures that a custom ref exists, creating an orphan commit +// with an empty tree if it does not. +func (s *V2GitStore) ensureRef(refName plumbing.ReferenceName) error { + _, err := s.repo.Reference(refName, true) + if err == nil { + return nil // Already exists + } + + emptyTreeHash, err := BuildTreeFromEntries(s.repo, make(map[string]object.TreeEntry)) + if err != nil { + return fmt.Errorf("failed to build empty tree: %w", err) + } + + authorName, authorEmail := GetGitAuthorFromRepo(s.repo) + commitHash, err := CreateCommit(s.repo, emptyTreeHash, plumbing.ZeroHash, "Initialize v2 ref", authorName, authorEmail) + if err != nil { + return fmt.Errorf("failed to create initial commit: %w", err) + } + + ref := plumbing.NewHashReference(refName, commitHash) + if err := s.repo.Storer.SetReference(ref); err != nil { + return fmt.Errorf("failed to set ref %s: %w", refName, err) + } + + return nil +} + +// getRefState returns the parent commit hash and root tree hash for a ref. +func (s *V2GitStore) getRefState(refName plumbing.ReferenceName) (parentHash, treeHash plumbing.Hash, err error) { + ref, err := s.repo.Reference(refName, true) + if err != nil { + return plumbing.ZeroHash, plumbing.ZeroHash, fmt.Errorf("ref %s not found: %w", refName, err) + } + + commit, err := s.repo.CommitObject(ref.Hash()) + if err != nil { + return plumbing.ZeroHash, plumbing.ZeroHash, fmt.Errorf("failed to get commit for ref %s: %w", refName, err) + } + + return ref.Hash(), commit.TreeHash, nil +} + +// updateRef creates a new commit on a ref with the given tree, updating the ref to point to it. +func (s *V2GitStore) updateRef(refName plumbing.ReferenceName, treeHash, parentHash plumbing.Hash, message, authorName, authorEmail string) error { + commitHash, err := CreateCommit(s.repo, treeHash, parentHash, message, authorName, authorEmail) + if err != nil { + return fmt.Errorf("failed to create commit: %w", err) + } + + ref := plumbing.NewHashReference(refName, commitHash) + if err := s.repo.Storer.SetReference(ref); err != nil { + return fmt.Errorf("failed to update ref %s: %w", refName, err) + } + + return nil +} diff --git a/cmd/entire/cli/checkpoint/v2_store_test.go b/cmd/entire/cli/checkpoint/v2_store_test.go new file mode 100644 index 000000000..cd3b97609 --- /dev/null +++ b/cmd/entire/cli/checkpoint/v2_store_test.go @@ -0,0 +1,717 @@ +package checkpoint + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/object" +) + +// initTestRepo creates a bare-minimum git repo with one commit (needed for HEAD). +func initTestRepo(t *testing.T) *git.Repository { + t.Helper() + dir := t.TempDir() + + repo, err := git.PlainInit(dir, false) + require.NoError(t, err) + + wt, err := repo.Worktree() + require.NoError(t, err) + + require.NoError(t, os.WriteFile(filepath.Join(dir, "README.md"), []byte("init"), 0o644)) + _, err = wt.Add("README.md") + require.NoError(t, err) + _, err = wt.Commit("initial", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@test.com"}, + }) + require.NoError(t, err) + + return repo +} + +func TestNewV2GitStore(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + require.NotNil(t, store) + require.Equal(t, repo, store.repo) +} + +func TestV2GitStore_EnsureRef_CreatesNewRef(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + + refName := plumbing.ReferenceName(paths.V2MainRefName) + + // Ref should not exist yet + _, err := repo.Reference(refName, true) + require.Error(t, err) + + // Ensure creates it + require.NoError(t, store.ensureRef(refName)) + + // Ref should now exist and point to a valid commit with an empty tree + ref, err := repo.Reference(refName, true) + require.NoError(t, err) + + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + + tree, err := commit.Tree() + require.NoError(t, err) + require.Empty(t, tree.Entries, "initial tree should be empty") +} + +func TestV2GitStore_EnsureRef_Idempotent(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + + refName := plumbing.ReferenceName(paths.V2MainRefName) + + require.NoError(t, store.ensureRef(refName)) + ref1, err := repo.Reference(refName, true) + require.NoError(t, err) + + // Second call should be a no-op — same commit hash + require.NoError(t, store.ensureRef(refName)) + ref2, err := repo.Reference(refName, true) + require.NoError(t, err) + require.Equal(t, ref1.Hash(), ref2.Hash()) +} + +func TestV2GitStore_EnsureRef_DifferentRefs(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + + mainRef := plumbing.ReferenceName(paths.V2MainRefName) + fullRef := plumbing.ReferenceName(paths.V2FullCurrentRefName) + + require.NoError(t, store.ensureRef(mainRef)) + require.NoError(t, store.ensureRef(fullRef)) + + // Both should exist independently + _, err := repo.Reference(mainRef, true) + require.NoError(t, err) + _, err = repo.Reference(fullRef, true) + require.NoError(t, err) +} + +func TestV2GitStore_GetRefState_ReturnsParentAndTree(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + + refName := plumbing.ReferenceName(paths.V2MainRefName) + require.NoError(t, store.ensureRef(refName)) + + parentHash, treeHash, err := store.getRefState(refName) + require.NoError(t, err) + require.NotEqual(t, plumbing.ZeroHash, parentHash, "parent hash should be non-zero") + // Tree hash can be zero hash for empty tree or a valid hash — just verify no error + _ = treeHash +} + +func TestV2GitStore_GetRefState_ErrorsOnMissingRef(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + + refName := plumbing.ReferenceName("refs/entire/nonexistent") + _, _, err := store.getRefState(refName) + require.Error(t, err) +} + +func TestV2GitStore_UpdateRef_CreatesCommit(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + + refName := plumbing.ReferenceName(paths.V2MainRefName) + require.NoError(t, store.ensureRef(refName)) + + parentHash, treeHash, err := store.getRefState(refName) + require.NoError(t, err) + + // Build a tree with one file + blobHash, err := CreateBlobFromContent(repo, []byte("hello")) + require.NoError(t, err) + + entries := map[string]object.TreeEntry{ + "test.txt": {Name: "test.txt", Mode: 0o100644, Hash: blobHash}, + } + newTreeHash, err := BuildTreeFromEntries(repo, entries) + require.NoError(t, err) + require.NotEqual(t, treeHash, newTreeHash) + + // Update the ref + require.NoError(t, store.updateRef(refName, newTreeHash, parentHash, "test commit", "Test", "test@test.com")) + + // Verify the ref now points to a commit with our tree + ref, err := repo.Reference(refName, true) + require.NoError(t, err) + require.NotEqual(t, parentHash, ref.Hash(), "ref should point to new commit") + + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + require.Equal(t, newTreeHash, commit.TreeHash) + require.Equal(t, "test commit", commit.Message) + require.Len(t, commit.ParentHashes, 1) + require.Equal(t, parentHash, commit.ParentHashes[0]) +} + +// v2MainTree returns the root tree from the /main ref for test assertions. +func v2MainTree(t *testing.T, repo *git.Repository) *object.Tree { + t.Helper() + ref, err := repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true) + require.NoError(t, err) + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + tree, err := commit.Tree() + require.NoError(t, err) + return tree +} + +// v2ReadFile reads a file from a git tree by path. +func v2ReadFile(t *testing.T, tree *object.Tree, path string) string { + t.Helper() + file, err := tree.File(path) + require.NoError(t, err, "expected file at %s", path) + content, err := file.Contents() + require.NoError(t, err) + return content +} + +func TestV2GitStore_WriteCommittedMain_WritesMetadata(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("a1b2c3d4e5f6") + _, err := store.writeCommittedMain(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-001", + Strategy: "manual-commit", + Agent: agent.AgentTypeClaudeCode, + Transcript: []byte(`{"type":"human","message":"hello"}`), + Prompts: []string{"hello"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + tree := v2MainTree(t, repo) + cpPath := cpID.Path() + + // Root CheckpointSummary should exist + summaryContent := v2ReadFile(t, tree, cpPath+"/"+paths.MetadataFileName) + var summary CheckpointSummary + require.NoError(t, json.Unmarshal([]byte(summaryContent), &summary)) + assert.Equal(t, cpID, summary.CheckpointID) + assert.Equal(t, "manual-commit", summary.Strategy) + assert.Len(t, summary.Sessions, 1) + + // Session metadata should exist in subdirectory 0/ + sessionMeta := v2ReadFile(t, tree, cpPath+"/0/"+paths.MetadataFileName) + var meta CommittedMetadata + require.NoError(t, json.Unmarshal([]byte(sessionMeta), &meta)) + assert.Equal(t, "test-session-001", meta.SessionID) + assert.Equal(t, agent.AgentTypeClaudeCode, meta.Agent) +} + +func TestV2GitStore_WriteCommittedMain_WritesPrompts(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("b2c3d4e5f6a1") + _, err := store.writeCommittedMain(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-002", + Strategy: "manual-commit", + Transcript: []byte(`{"line":"one"}`), + Prompts: []string{"do the thing", "also this"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + tree := v2MainTree(t, repo) + cpPath := cpID.Path() + + // prompt.txt should contain both prompts joined by separator + promptContent := v2ReadFile(t, tree, cpPath+"/0/"+paths.PromptFileName) + assert.Contains(t, promptContent, "do the thing") + assert.Contains(t, promptContent, "also this") + + // content_hash.txt should NOT be on /main — it lives on /full/current + mainSessionTree, err := tree.Tree(cpPath + "/0") + require.NoError(t, err) + _, err = mainSessionTree.File(paths.ContentHashFileName) + assert.Error(t, err, "content_hash.txt should not be on /main ref") +} + +func TestV2GitStore_WriteCommittedMain_ExcludesTranscript(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("c3d4e5f6a1b2") + _, err := store.writeCommittedMain(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-003", + Strategy: "manual-commit", + Transcript: []byte(`{"line":"one"}` + "\n" + `{"line":"two"}`), + Prompts: []string{"hello"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + tree := v2MainTree(t, repo) + cpPath := cpID.Path() + + // full.jsonl should NOT be in the /main tree + cpTree, err := tree.Tree(cpPath) + require.NoError(t, err) + + sessionTree, err := cpTree.Tree("0") + require.NoError(t, err) + + for _, entry := range sessionTree.Entries { + assert.NotEqual(t, paths.TranscriptFileName, entry.Name, + "raw transcript (full.jsonl) must not be on /main ref") + assert.False(t, strings.HasPrefix(entry.Name, paths.TranscriptFileName+"."), + "transcript chunks must not be on /main ref") + } +} + +func TestV2GitStore_WriteCommittedMain_MultiSession(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("e5f6a1b2c3d4") + + // First session + _, err := store.writeCommittedMain(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-A", + Strategy: "manual-commit", + Transcript: []byte(`{"line":"a"}`), + CheckpointsCount: 3, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + // Second session (different session ID, same checkpoint) + _, err = store.writeCommittedMain(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-B", + Strategy: "manual-commit", + Transcript: []byte(`{"line":"b"}`), + CheckpointsCount: 2, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + tree := v2MainTree(t, repo) + cpPath := cpID.Path() + + // Root summary should list 2 sessions + summaryContent := v2ReadFile(t, tree, cpPath+"/"+paths.MetadataFileName) + var summary CheckpointSummary + require.NoError(t, json.Unmarshal([]byte(summaryContent), &summary)) + assert.Len(t, summary.Sessions, 2) + assert.Equal(t, 5, summary.CheckpointsCount, "aggregated count: 3+2") + + // Both session subdirectories should exist + _ = v2ReadFile(t, tree, cpPath+"/0/"+paths.MetadataFileName) + _ = v2ReadFile(t, tree, cpPath+"/1/"+paths.MetadataFileName) +} + +// v2FullTree returns the root tree from the /full/current ref for test assertions. +func v2FullTree(t *testing.T, repo *git.Repository) *object.Tree { + t.Helper() + ref, err := repo.Reference(plumbing.ReferenceName(paths.V2FullCurrentRefName), true) + require.NoError(t, err) + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + tree, err := commit.Tree() + require.NoError(t, err) + return tree +} + +func TestV2GitStore_WriteCommittedFull_WritesTranscript(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("f1a2b3c4d5e6") + transcript := []byte(`{"type":"human","message":"hello"}` + "\n" + `{"type":"assistant","message":"hi"}`) + + err := store.writeCommittedFullTranscript(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-full-001", + Strategy: "manual-commit", + Transcript: transcript, + Agent: agent.AgentTypeClaudeCode, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }, 0) + require.NoError(t, err) + + tree := v2FullTree(t, repo) + cpPath := cpID.Path() + + // Transcript should exist at session subdirectory 0/ + content := v2ReadFile(t, tree, cpPath+"/0/"+paths.TranscriptFileName) + assert.Contains(t, content, `"type":"human"`) + assert.Contains(t, content, `"type":"assistant"`) +} + +func TestV2GitStore_WriteCommittedFull_ExcludesMetadata(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("a2b3c4d5e6f1") + err := store.writeCommittedFullTranscript(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-full-002", + Strategy: "manual-commit", + Transcript: []byte(`{"line":"one"}`), + Prompts: []string{"hello"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }, 0) + require.NoError(t, err) + + tree := v2FullTree(t, repo) + cpPath := cpID.Path() + + cpTree, err := tree.Tree(cpPath) + require.NoError(t, err) + + sessionTree, err := cpTree.Tree("0") + require.NoError(t, err) + + for _, entry := range sessionTree.Entries { + assert.NotEqual(t, paths.MetadataFileName, entry.Name, + "metadata.json must not be on /full/current ref") + assert.NotEqual(t, paths.PromptFileName, entry.Name, + "prompt.txt must not be on /full/current ref") + } + + // content_hash.txt SHOULD be on /full/current (co-located with the transcript it hashes) + hashContent := v2ReadFile(t, tree, cpPath+"/0/"+paths.ContentHashFileName) + assert.True(t, strings.HasPrefix(hashContent, "sha256:"), "content hash should be sha256 prefixed") +} + +func TestV2GitStore_WriteCommittedFull_NoTranscript_Noop(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("b3c4d5e6f1a2") + err := store.writeCommittedFullTranscript(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-full-003", + Strategy: "manual-commit", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }, 0) + require.NoError(t, err) + + // /full/current ref should either not exist or have an empty tree + ref, err := repo.Reference(plumbing.ReferenceName(paths.V2FullCurrentRefName), true) + if err == nil { + commit, cErr := repo.CommitObject(ref.Hash()) + require.NoError(t, cErr) + tree, tErr := commit.Tree() + require.NoError(t, tErr) + assert.Empty(t, tree.Entries, "empty transcript should produce no entries") + } + // If ref doesn't exist at all, that's also acceptable for a no-op +} + +func TestV2GitStore_WriteCommittedFullTranscript_AccumulatesCheckpoints(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpA := id.MustCheckpointID("c4d5e6f1a2b3") + cpB := id.MustCheckpointID("d5e6f1a2b3c4") + + // Write checkpoint A + err := store.writeCommittedFullTranscript(ctx, WriteCommittedOptions{ + CheckpointID: cpA, + SessionID: "session-A", + Strategy: "manual-commit", + Transcript: []byte(`{"from":"A"}`), + AuthorName: "Test", + AuthorEmail: "test@test.com", + }, 0) + require.NoError(t, err) + + // Write checkpoint B — should accumulate alongside A + err = store.writeCommittedFullTranscript(ctx, WriteCommittedOptions{ + CheckpointID: cpB, + SessionID: "session-B", + Strategy: "manual-commit", + Transcript: []byte(`{"from":"B"}`), + AuthorName: "Test", + AuthorEmail: "test@test.com", + }, 0) + require.NoError(t, err) + + tree := v2FullTree(t, repo) + + // Both checkpoints should be present + contentA := v2ReadFile(t, tree, cpA.Path()+"/0/"+paths.TranscriptFileName) + assert.Contains(t, contentA, `"from":"A"`) + + contentB := v2ReadFile(t, tree, cpB.Path()+"/0/"+paths.TranscriptFileName) + assert.Contains(t, contentB, `"from":"B"`) +} + +func TestV2GitStore_WriteCommitted_WritesBothRefs(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("aa11bb22cc33") + err := store.WriteCommitted(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-both", + Strategy: "manual-commit", + Agent: agent.AgentTypeClaudeCode, + Transcript: []byte(`{"type":"assistant","message":"hello"}`), + Prompts: []string{"hi there"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + cpPath := cpID.Path() + + // /main ref should have metadata and prompt — no transcript or content hash + mainTree := v2MainTree(t, repo) + _ = v2ReadFile(t, mainTree, cpPath+"/"+paths.MetadataFileName) + _ = v2ReadFile(t, mainTree, cpPath+"/0/"+paths.MetadataFileName) + _ = v2ReadFile(t, mainTree, cpPath+"/0/"+paths.PromptFileName) + + mainSessionTree, err := mainTree.Tree(cpPath + "/0") + require.NoError(t, err) + for _, entry := range mainSessionTree.Entries { + assert.NotEqual(t, paths.TranscriptFileName, entry.Name) + assert.NotEqual(t, paths.ContentHashFileName, entry.Name) + } + + // /full/current ref should have transcript + content hash + fullTree := v2FullTree(t, repo) + content := v2ReadFile(t, fullTree, cpPath+"/0/"+paths.TranscriptFileName) + assert.Contains(t, content, `"type":"assistant"`) + hashContent := v2ReadFile(t, fullTree, cpPath+"/0/"+paths.ContentHashFileName) + assert.True(t, strings.HasPrefix(hashContent, "sha256:")) +} + +func TestV2GitStore_WriteCommitted_NoTranscript_OnlyWritesMain(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("bb22cc33dd44") + err := store.WriteCommitted(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-notx", + Strategy: "manual-commit", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + // /main should have metadata + mainTree := v2MainTree(t, repo) + _ = v2ReadFile(t, mainTree, cpID.Path()+"/0/"+paths.MetadataFileName) + + // /full/current ref should not exist (no transcript = no-op for full) + _, err = repo.Reference(plumbing.ReferenceName(paths.V2FullCurrentRefName), true) + assert.Error(t, err, "/full/current should not exist when no transcript is written") +} + +func TestV2GitStore_WriteCommitted_MultiSession_ConsistentIndex(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("cc33dd44ee55") + + // First session + err := store.WriteCommitted(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-X", + Strategy: "manual-commit", + Transcript: []byte(`{"from":"X"}`), + CheckpointsCount: 2, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + // Second session — same checkpoint, different session ID + err = store.WriteCommitted(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-Y", + Strategy: "manual-commit", + Transcript: []byte(`{"from":"Y"}`), + CheckpointsCount: 3, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + cpPath := cpID.Path() + + // /main should have both sessions + mainTree := v2MainTree(t, repo) + summaryContent := v2ReadFile(t, mainTree, cpPath+"/"+paths.MetadataFileName) + var summary CheckpointSummary + require.NoError(t, json.Unmarshal([]byte(summaryContent), &summary)) + assert.Len(t, summary.Sessions, 2) + + // /full/current should have session Y (latest write replaces) + fullTree := v2FullTree(t, repo) + contentY := v2ReadFile(t, fullTree, cpPath+"/1/"+paths.TranscriptFileName) + assert.Contains(t, contentY, `"from":"Y"`) +} + +func TestV2GitStore_UpdateCommitted_UpdatesBothRefs(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("ff11aa22bb33") + + // Initial write + err := store.WriteCommitted(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-update", + Strategy: "manual-commit", + Agent: agent.AgentTypeClaudeCode, + Transcript: []byte(`{"type":"assistant","message":"initial"}`), + Prompts: []string{"first prompt"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + // Update with finalized transcript and prompts + err = store.UpdateCommitted(ctx, UpdateCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-update", + Transcript: []byte(`{"type":"assistant","message":"finalized"}`), + Prompts: []string{"first prompt", "second prompt"}, + Agent: agent.AgentTypeClaudeCode, + }) + require.NoError(t, err) + + cpPath := cpID.Path() + + // /main should have updated prompts + mainTree := v2MainTree(t, repo) + promptContent := v2ReadFile(t, mainTree, cpPath+"/0/"+paths.PromptFileName) + assert.Contains(t, promptContent, "second prompt") + + // /full/current should have finalized transcript + fullTree := v2FullTree(t, repo) + content := v2ReadFile(t, fullTree, cpPath+"/0/"+paths.TranscriptFileName) + assert.Contains(t, content, "finalized") + assert.NotContains(t, content, "initial") +} + +func TestV2GitStore_UpdateCommitted_NoTranscript_OnlyUpdatesMain(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("aa33bb44cc55") + + // Initial write with transcript + err := store.WriteCommitted(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-noupdate", + Strategy: "manual-commit", + Transcript: []byte(`{"type":"assistant","message":"original"}`), + Prompts: []string{"old prompt"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + // Update with only prompts (no transcript) + err = store.UpdateCommitted(ctx, UpdateCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-noupdate", + Prompts: []string{"old prompt", "new prompt"}, + Agent: agent.AgentTypeClaudeCode, + }) + require.NoError(t, err) + + // /main should have updated prompts + mainTree := v2MainTree(t, repo) + promptContent := v2ReadFile(t, mainTree, cpID.Path()+"/0/"+paths.PromptFileName) + assert.Contains(t, promptContent, "new prompt") + + // /full/current should still have original transcript (not replaced) + fullTree := v2FullTree(t, repo) + content := v2ReadFile(t, fullTree, cpID.Path()+"/0/"+paths.TranscriptFileName) + assert.Contains(t, content, "original") +} + +func TestV2GitStore_UpdateCommitted_CheckpointNotFound(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("bb44cc55dd66") + + // Update without prior write should return error + err := store.UpdateCommitted(ctx, UpdateCommittedOptions{ + CheckpointID: cpID, + SessionID: "nonexistent", + Transcript: []byte(`{"type":"assistant","message":"hello"}`), + Agent: agent.AgentTypeClaudeCode, + }) + require.Error(t, err) +} diff --git a/cmd/entire/cli/integration_test/testenv.go b/cmd/entire/cli/integration_test/testenv.go index c2fee1e62..4cb94636a 100644 --- a/cmd/entire/cli/integration_test/testenv.go +++ b/cmd/entire/cli/integration_test/testenv.go @@ -854,6 +854,59 @@ func (env *TestEnv) ReadFileFromBranch(branchName, filePath string) (string, boo return content, true } +// ReadFileFromRef reads a file's content from a specific ref's tree. +// Unlike ReadFileFromBranch, this takes a full ref name (e.g., "refs/entire/checkpoints/v2/main") +// and does not prepend "refs/heads/". +// Returns the content and true if found, empty string and false if not found. +func (env *TestEnv) ReadFileFromRef(refName, filePath string) (string, bool) { + env.T.Helper() + + repo, err := git.PlainOpen(env.RepoDir) + if err != nil { + env.T.Fatalf("failed to open git repo: %v", err) + } + + ref, err := repo.Reference(plumbing.ReferenceName(refName), true) + if err != nil { + return "", false + } + + commit, err := repo.CommitObject(ref.Hash()) + if err != nil { + return "", false + } + + tree, err := commit.Tree() + if err != nil { + return "", false + } + + file, err := tree.File(filePath) + if err != nil { + return "", false + } + + content, err := file.Contents() + if err != nil { + return "", false + } + + return content, true +} + +// RefExists checks if a ref exists in the repository. +func (env *TestEnv) RefExists(refName string) bool { + env.T.Helper() + + repo, err := git.PlainOpen(env.RepoDir) + if err != nil { + env.T.Fatalf("failed to open git repo: %v", err) + } + + _, err = repo.Reference(plumbing.ReferenceName(refName), true) + return err == nil +} + // GetLatestCommitMessageOnBranch returns the commit message of the latest commit on the given branch. func (env *TestEnv) GetLatestCommitMessageOnBranch(branchName string) string { env.T.Helper() diff --git a/cmd/entire/cli/integration_test/v2_dual_write_test.go b/cmd/entire/cli/integration_test/v2_dual_write_test.go new file mode 100644 index 000000000..714ba6c98 --- /dev/null +++ b/cmd/entire/cli/integration_test/v2_dual_write_test.go @@ -0,0 +1,230 @@ +//go:build integration + +package integration + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestV2DualWrite_FullWorkflow verifies that when checkpoints_v2 is enabled, +// a full session workflow (prompt → stop → commit) writes checkpoint data +// to both v1 and v2 refs. +func TestV2DualWrite_FullWorkflow(t *testing.T) { + t.Parallel() + env := NewTestEnv(t) + defer env.Cleanup() + + env.InitRepo() + env.WriteFile("README.md", "# Test") + env.WriteFile(".gitignore", ".entire/\n") + env.GitAdd("README.md") + env.GitAdd(".gitignore") + env.GitCommit("Initial commit") + env.GitCheckoutNewBranch("feature/v2-test") + + // Initialize with checkpoints_v2 enabled + env.InitEntireWithOptions(map[string]any{ + "checkpoints_v2": true, + }) + + // Start session + session := env.NewSession() + err := env.SimulateUserPromptSubmitWithPrompt(session.ID, "Add greeting function") + require.NoError(t, err) + + // Create a file and transcript + env.WriteFile("greet.go", "package main\n\nfunc Greet() string { return \"hello\" }") + session.CreateTranscript( + "Add greeting function", + []FileChange{{Path: "greet.go", Content: "package main\n\nfunc Greet() string { return \"hello\" }"}}, + ) + err = env.SimulateStop(session.ID, session.TranscriptPath) + require.NoError(t, err) + + // User commits (triggers prepare-commit-msg + post-commit → condensation) + env.GitCommitWithShadowHooks("Add greeting function", "greet.go") + + // Get checkpoint ID from commit trailer + cpIDStr := env.GetLatestCheckpointIDFromHistory() + require.NotEmpty(t, cpIDStr, "checkpoint ID should be in commit trailer") + + cpID, err := id.NewCheckpointID(cpIDStr) + require.NoError(t, err) + cpPath := cpID.Path() + + // ======================================== + // Verify v1 branch (existing behavior) + // ======================================== + assert.True(t, env.BranchExists(paths.MetadataBranchName), + "v1 metadata branch should exist") + + v1Summary, found := env.ReadFileFromBranch(paths.MetadataBranchName, cpPath+"/"+paths.MetadataFileName) + require.True(t, found, "v1 root metadata.json should exist") + assert.Contains(t, v1Summary, cpIDStr) + + // ======================================== + // Verify v2 /main ref + // ======================================== + assert.True(t, env.RefExists(paths.V2MainRefName), + "v2 /main ref should exist") + + // Root CheckpointSummary + mainSummary, found := env.ReadFileFromRef(paths.V2MainRefName, cpPath+"/"+paths.MetadataFileName) + require.True(t, found, "v2 /main root metadata.json should exist") + + var summary checkpoint.CheckpointSummary + require.NoError(t, json.Unmarshal([]byte(mainSummary), &summary)) + assert.Equal(t, cpID, summary.CheckpointID) + assert.Len(t, summary.Sessions, 1) + + // Session metadata + mainSessionMeta, found := env.ReadFileFromRef(paths.V2MainRefName, cpPath+"/0/"+paths.MetadataFileName) + require.True(t, found, "v2 /main session metadata.json should exist") + assert.Contains(t, mainSessionMeta, session.ID) + + // Prompts + mainPrompts, found := env.ReadFileFromRef(paths.V2MainRefName, cpPath+"/0/"+paths.PromptFileName) + require.True(t, found, "v2 /main prompt.txt should exist") + assert.Contains(t, mainPrompts, "Add greeting function") + + // Transcript should NOT be on /main + _, found = env.ReadFileFromRef(paths.V2MainRefName, cpPath+"/0/"+paths.TranscriptFileName) + assert.False(t, found, "full.jsonl should NOT be on v2 /main") + + // ======================================== + // Verify v2 /full/current ref + // ======================================== + assert.True(t, env.RefExists(paths.V2FullCurrentRefName), + "v2 /full/current ref should exist") + + // Transcript should be on /full/current + fullTranscript, found := env.ReadFileFromRef(paths.V2FullCurrentRefName, cpPath+"/0/"+paths.TranscriptFileName) + require.True(t, found, "full.jsonl should exist on v2 /full/current") + assert.Contains(t, fullTranscript, "Greet") + + // Content hash should be co-located with transcript + fullHash, found := env.ReadFileFromRef(paths.V2FullCurrentRefName, cpPath+"/0/"+paths.ContentHashFileName) + require.True(t, found, "content_hash.txt should exist on v2 /full/current") + assert.True(t, strings.HasPrefix(fullHash, "sha256:")) + + // Metadata should NOT be on /full/current + _, found = env.ReadFileFromRef(paths.V2FullCurrentRefName, cpPath+"/0/"+paths.MetadataFileName) + assert.False(t, found, "metadata.json should NOT be on v2 /full/current") +} + +// TestV2DualWrite_Disabled verifies that when checkpoints_v2 is NOT enabled, +// no v2 refs are created. +func TestV2DualWrite_Disabled(t *testing.T) { + t.Parallel() + env := NewTestEnv(t) + defer env.Cleanup() + + env.InitRepo() + env.WriteFile("README.md", "# Test") + env.WriteFile(".gitignore", ".entire/\n") + env.GitAdd("README.md") + env.GitAdd(".gitignore") + env.GitCommit("Initial commit") + env.GitCheckoutNewBranch("feature/v2-disabled") + + // Initialize WITHOUT checkpoints_v2 + env.InitEntire() + + session := env.NewSession() + err := env.SimulateUserPromptSubmitWithPrompt(session.ID, "Add helper") + require.NoError(t, err) + + env.WriteFile("helper.go", "package main\n\nfunc Helper() {}") + session.CreateTranscript( + "Add helper", + []FileChange{{Path: "helper.go", Content: "package main\n\nfunc Helper() {}"}}, + ) + err = env.SimulateStop(session.ID, session.TranscriptPath) + require.NoError(t, err) + + env.GitCommitWithShadowHooks("Add helper", "helper.go") + + // v1 should exist + assert.True(t, env.BranchExists(paths.MetadataBranchName), + "v1 metadata branch should exist") + + // v2 refs should NOT exist + assert.False(t, env.RefExists(paths.V2MainRefName), + "v2 /main ref should NOT exist when v2 is disabled") + assert.False(t, env.RefExists(paths.V2FullCurrentRefName), + "v2 /full/current ref should NOT exist when v2 is disabled") +} + +// TestV2DualWrite_StopTimeFinalization verifies that stop-time transcript +// finalization also updates v2 refs when checkpoints_v2 is enabled. +func TestV2DualWrite_StopTimeFinalization(t *testing.T) { + t.Parallel() + env := NewTestEnv(t) + defer env.Cleanup() + + env.InitRepo() + env.WriteFile("README.md", "# Test") + env.WriteFile(".gitignore", ".entire/\n") + env.GitAdd("README.md") + env.GitAdd(".gitignore") + env.GitCommit("Initial commit") + env.GitCheckoutNewBranch("feature/v2-finalize") + + env.InitEntireWithOptions(map[string]any{ + "checkpoints_v2": true, + }) + + // Start session and create first checkpoint + session := env.NewSession() + err := env.SimulateUserPromptSubmitWithPrompt(session.ID, "Create main file") + require.NoError(t, err) + + env.WriteFile("main.go", "package main\n\nfunc main() {}") + session.CreateTranscript( + "Create main file", + []FileChange{{Path: "main.go", Content: "package main\n\nfunc main() {}"}}, + ) + err = env.SimulateStop(session.ID, session.TranscriptPath) + require.NoError(t, err) + + // Mid-session commit (checkpoint condensed, but transcript is provisional) + env.GitCommitWithShadowHooks("Add main.go", "main.go") + + cpIDStr := env.GetLatestCheckpointIDFromHistory() + require.NotEmpty(t, cpIDStr) + + cpID, err := id.NewCheckpointID(cpIDStr) + require.NoError(t, err) + cpPath := cpID.Path() + + // Continue session with more work + err = env.SimulateUserPromptSubmitWithPrompt(session.ID, "Add tests") + require.NoError(t, err) + + env.WriteFile("main_test.go", "package main\n\nimport \"testing\"\n\nfunc TestMain(t *testing.T) {}") + // Rebuild transcript with both turns (CreateTranscript replaces) + session.CreateTranscript( + "Add tests", + []FileChange{ + {Path: "main.go", Content: "package main\n\nfunc main() {}"}, + {Path: "main_test.go", Content: "package main\n\nimport \"testing\"\n\nfunc TestMain(t *testing.T) {}"}, + }, + ) + + // Stop finalizes the transcript for all turn checkpoints + err = env.SimulateStop(session.ID, session.TranscriptPath) + require.NoError(t, err) + + // After stop-time finalization, /full/current should have the finalized transcript + fullTranscript, found := env.ReadFileFromRef(paths.V2FullCurrentRefName, cpPath+"/0/"+paths.TranscriptFileName) + require.True(t, found, "full.jsonl should exist on /full/current after finalization") + assert.Contains(t, fullTranscript, "main") +} diff --git a/cmd/entire/cli/paths/paths.go b/cmd/entire/cli/paths/paths.go index 98706ba2e..80d3e8761 100644 --- a/cmd/entire/cli/paths/paths.go +++ b/cmd/entire/cli/paths/paths.go @@ -34,6 +34,16 @@ const ( // MetadataBranchName is the orphan branch used by manual-commit strategy to store metadata const MetadataBranchName = "entire/checkpoints/v1" +// V2 ref names use custom refs under refs/entire/ (not refs/heads/). +// These are invisible in GitHub's branch UI and not fetched by default. +const ( + // V2MainRefName stores permanent metadata + compact transcripts. + V2MainRefName = "refs/entire/checkpoints/v2/main" + + // V2FullCurrentRefName stores the active generation of raw transcripts. + V2FullCurrentRefName = "refs/entire/checkpoints/v2/full/current" +) + // TrailsBranchName is the orphan branch used to store trail metadata. // Trails are branch-centric work tracking abstractions that link to checkpoints by branch name. const TrailsBranchName = "entire/trails/v1" diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index 608482151..275edf0e4 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -243,8 +243,8 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re } } - // Write checkpoint metadata using the checkpoint store - if err := store.WriteCommitted(ctx, cpkg.WriteCommittedOptions{ + // Build write options (shared by v1 and v2) + writeOpts := cpkg.WriteCommittedOptions{ CheckpointID: checkpointID, SessionID: state.SessionID, Strategy: StrategyNameManualCommit, @@ -265,10 +265,16 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re SessionMetrics: buildSessionMetrics(state), InitialAttribution: attribution, Summary: summary, - }); err != nil { + } + + // Write checkpoint metadata to v1 branch + if err := store.WriteCommitted(ctx, writeOpts); err != nil { return nil, fmt.Errorf("failed to write checkpoint metadata: %w", err) } + // Dual-write to v2 refs when enabled + writeCommittedV2IfEnabled(ctx, repo, writeOpts) + return &CondenseResult{ CheckpointID: checkpointID, SessionID: state.SessionID, @@ -907,3 +913,20 @@ func (s *ManualCommitStrategy) cleanupShadowBranchIfUnused(ctx context.Context, } return nil } + +// writeCommittedV2IfEnabled writes checkpoint data to v2 refs when checkpoints_v2 +// is enabled in settings. Failures are logged as warnings — v2 writes are +// best-effort during the dual-write period and must not block the v1 path. +func writeCommittedV2IfEnabled(ctx context.Context, repo *git.Repository, opts cpkg.WriteCommittedOptions) { + if !settings.IsCheckpointsV2Enabled(ctx) { + return + } + + v2Store := cpkg.NewV2GitStore(repo) + if err := v2Store.WriteCommitted(ctx, opts); err != nil { + logging.Warn(ctx, "v2 dual-write failed", + slog.String("checkpoint_id", opts.CheckpointID.String()), + slog.String("error", err.Error()), + ) + } +} diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index c90c3bd6e..792a52d41 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -2264,6 +2264,12 @@ func (s *ManualCommitStrategy) finalizeAllTurnCheckpoints(ctx context.Context, s store := checkpoint.NewGitStore(repo) + // Evaluate v2 flag once before the loop to avoid re-reading settings per checkpoint + var v2Store *checkpoint.V2GitStore + if settings.IsCheckpointsV2Enabled(logCtx) { + v2Store = checkpoint.NewV2GitStore(repo) + } + // Update each checkpoint with the full transcript for _, cpIDStr := range state.TurnCheckpointIDs { cpID, parseErr := id.NewCheckpointID(cpIDStr) @@ -2276,13 +2282,15 @@ func (s *ManualCommitStrategy) finalizeAllTurnCheckpoints(ctx context.Context, s continue } - updateErr := store.UpdateCommitted(ctx, checkpoint.UpdateCommittedOptions{ + updateOpts := checkpoint.UpdateCommittedOptions{ CheckpointID: cpID, SessionID: state.SessionID, Transcript: fullTranscript, Prompts: prompts, Agent: state.AgentType, - }) + } + + updateErr := store.UpdateCommitted(ctx, updateOpts) if updateErr != nil { logging.Warn(logCtx, "finalize: failed to update checkpoint", slog.String("checkpoint_id", cpIDStr), @@ -2292,6 +2300,16 @@ func (s *ManualCommitStrategy) finalizeAllTurnCheckpoints(ctx context.Context, s continue } + // Dual-write: update v2 refs when enabled + if v2Store != nil { + if v2Err := v2Store.UpdateCommitted(logCtx, updateOpts); v2Err != nil { + logging.Warn(logCtx, "v2 dual-write update failed", + slog.String("checkpoint_id", cpIDStr), + slog.String("error", v2Err.Error()), + ) + } + } + logging.Info(logCtx, "finalize: checkpoint updated with full transcript", slog.String("checkpoint_id", cpIDStr), slog.String("session_id", state.SessionID), diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index 4b4e9294e..5f4102b16 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -3875,3 +3875,179 @@ func TestResolveFilesTouched_PrefersStateFallsBackToTranscript(t *testing.T) { } }) } + +// TestCondenseSession_V2DualWrite verifies that when checkpoints_v2 is enabled, +// CondenseSession writes to both v1 (entire/checkpoints/v1) and v2 refs +// (refs/entire/checkpoints/v2/main and refs/entire/checkpoints/v2/full/current). +func TestCondenseSession_V2DualWrite(t *testing.T) { + dir := t.TempDir() + repo, err := git.PlainInit(dir, false) + require.NoError(t, err) + + worktree, err := repo.Worktree() + require.NoError(t, err) + + require.NoError(t, os.WriteFile(filepath.Join(dir, "main.go"), []byte("package main"), 0o644)) + _, err = worktree.Add("main.go") + require.NoError(t, err) + commitHash, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@test.com", When: time.Now()}, + }) + require.NoError(t, err) + + t.Chdir(dir) + + // Enable checkpoints_v2 via settings + entireDir := filepath.Join(dir, ".entire") + require.NoError(t, os.MkdirAll(entireDir, 0o755)) + settingsJSON := `{"enabled": true, "strategy": "manual-commit", "strategy_options": {"checkpoints_v2": true}}` + require.NoError(t, os.WriteFile(filepath.Join(entireDir, "settings.json"), []byte(settingsJSON), 0o644)) + + s := &ManualCommitStrategy{} + sessionID := "2025-01-15-test-v2-dual-write" + + // Create metadata directory with transcript + metadataDir := ".entire/metadata/" + sessionID + metadataDirAbs := filepath.Join(dir, metadataDir) + require.NoError(t, os.MkdirAll(metadataDirAbs, 0o755)) + + transcript := `{"type":"human","message":{"content":"hello"}} +{"type":"assistant","message":{"content":"hi there"}} +` + require.NoError(t, os.WriteFile(filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644)) + + // SaveStep to create shadow branch + err = s.SaveStep(context.Background(), StepContext{ + SessionID: sessionID, + ModifiedFiles: []string{"main.go"}, + MetadataDir: metadataDir, + MetadataDirAbs: metadataDirAbs, + CommitMessage: "Checkpoint 1", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + state, err := s.loadSessionState(context.Background(), sessionID) + require.NoError(t, err) + state.TranscriptPath = filepath.Join(metadataDirAbs, paths.TranscriptFileName) + state.BaseCommit = commitHash.String()[:7] + + checkpointID := id.MustCheckpointID("dd11ee22ff33") + result, err := s.CondenseSession(context.Background(), repo, checkpointID, state, nil) + require.NoError(t, err) + require.NotNil(t, result) + + // v1 branch should exist (as before) + v1Ref, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) + require.NoError(t, err, "v1 metadata branch should exist") + require.NotEqual(t, plumbing.ZeroHash, v1Ref.Hash()) + + // v2 /main ref should exist + v2MainRef, err := repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true) + require.NoError(t, err, "v2 /main ref should exist") + require.NotEqual(t, plumbing.ZeroHash, v2MainRef.Hash()) + + // v2 /full/current ref should exist (transcript was non-empty) + v2FullRef, err := repo.Reference(plumbing.ReferenceName(paths.V2FullCurrentRefName), true) + require.NoError(t, err, "v2 /full/current ref should exist") + require.NotEqual(t, plumbing.ZeroHash, v2FullRef.Hash()) + + // Verify /main has metadata but no transcript + v2MainCommit, err := repo.CommitObject(v2MainRef.Hash()) + require.NoError(t, err) + v2MainTree, err := v2MainCommit.Tree() + require.NoError(t, err) + + cpPath := checkpointID.Path() + mainCpTree, err := v2MainTree.Tree(cpPath) + require.NoError(t, err) + + // Root metadata.json should exist + _, err = mainCpTree.File(paths.MetadataFileName) + require.NoError(t, err, "root metadata.json should exist on /main") + + // Verify /full/current has transcript + v2FullCommit, err := repo.CommitObject(v2FullRef.Hash()) + require.NoError(t, err) + v2FullTree, err := v2FullCommit.Tree() + require.NoError(t, err) + + fullCpTree, err := v2FullTree.Tree(cpPath) + require.NoError(t, err) + fullSessionTree, err := fullCpTree.Tree("0") + require.NoError(t, err) + _, err = fullSessionTree.File(paths.TranscriptFileName) + require.NoError(t, err, "full.jsonl should exist on /full/current") +} + +// TestCondenseSession_V2Disabled_NoV2Refs verifies that when checkpoints_v2 is +// not enabled, CondenseSession only writes to v1 and does not create v2 refs. +func TestCondenseSession_V2Disabled_NoV2Refs(t *testing.T) { + dir := t.TempDir() + repo, err := git.PlainInit(dir, false) + require.NoError(t, err) + + worktree, err := repo.Worktree() + require.NoError(t, err) + + require.NoError(t, os.WriteFile(filepath.Join(dir, "main.go"), []byte("package main"), 0o644)) + _, err = worktree.Add("main.go") + require.NoError(t, err) + commitHash, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@test.com", When: time.Now()}, + }) + require.NoError(t, err) + + t.Chdir(dir) + + // No checkpoints_v2 setting — default is disabled + entireDir := filepath.Join(dir, ".entire") + require.NoError(t, os.MkdirAll(entireDir, 0o755)) + settingsJSON := `{"enabled": true, "strategy": "manual-commit"}` + require.NoError(t, os.WriteFile(filepath.Join(entireDir, "settings.json"), []byte(settingsJSON), 0o644)) + + s := &ManualCommitStrategy{} + sessionID := "2025-01-15-test-v2-disabled" + + metadataDir := ".entire/metadata/" + sessionID + metadataDirAbs := filepath.Join(dir, metadataDir) + require.NoError(t, os.MkdirAll(metadataDirAbs, 0o755)) + + transcript := `{"type":"human","message":{"content":"hello"}} +{"type":"assistant","message":{"content":"hi"}} +` + require.NoError(t, os.WriteFile(filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644)) + + err = s.SaveStep(context.Background(), StepContext{ + SessionID: sessionID, + ModifiedFiles: []string{"main.go"}, + MetadataDir: metadataDir, + MetadataDirAbs: metadataDirAbs, + CommitMessage: "Checkpoint 1", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + state, err := s.loadSessionState(context.Background(), sessionID) + require.NoError(t, err) + state.TranscriptPath = filepath.Join(metadataDirAbs, paths.TranscriptFileName) + state.BaseCommit = commitHash.String()[:7] + + checkpointID := id.MustCheckpointID("ee22ff33aa44") + result, err := s.CondenseSession(context.Background(), repo, checkpointID, state, nil) + require.NoError(t, err) + require.NotNil(t, result) + + // v1 should exist + _, err = repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) + require.NoError(t, err, "v1 metadata branch should exist") + + // v2 refs should NOT exist + _, err = repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true) + require.Error(t, err, "v2 /main ref should not exist when v2 is disabled") + + _, err = repo.Reference(plumbing.ReferenceName(paths.V2FullCurrentRefName), true) + require.Error(t, err, "v2 /full/current ref should not exist when v2 is disabled") +}