From 5ca7c36ea892bdc764ff47d521f55a9750c1d305 Mon Sep 17 00:00:00 2001 From: Stefan Haubold Date: Mon, 23 Mar 2026 14:15:56 +0100 Subject: [PATCH 1/6] add session log filtering Entire-Checkpoint: 3e14bfef5a8b --- cmd/entire/cli/checkpoint/committed.go | 33 +++- cmd/entire/cli/checkpoint/temporary.go | 15 +- cmd/entire/cli/filter/context.go | 45 +++++ cmd/entire/cli/filter/filter.go | 117 ++++++++++++ cmd/entire/cli/filter/filter_test.go | 176 ++++++++++++++++++ cmd/entire/cli/filter/validate.go | 54 ++++++ cmd/entire/cli/filter/validate_test.go | 85 +++++++++ cmd/entire/cli/settings/settings.go | 21 +++ .../cli/strategy/manual_commit_rewind.go | 8 +- 9 files changed, 538 insertions(+), 16 deletions(-) create mode 100644 cmd/entire/cli/filter/context.go create mode 100644 cmd/entire/cli/filter/filter.go create mode 100644 cmd/entire/cli/filter/filter_test.go create mode 100644 cmd/entire/cli/filter/validate.go create mode 100644 cmd/entire/cli/filter/validate_test.go diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 47a75beb8..60547ce90 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -18,6 +18,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/agent" "github.com/entireio/cli/cmd/entire/cli/agent/types" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/filter" "github.com/entireio/cli/cmd/entire/cli/jsonutil" "github.com/entireio/cli/cmd/entire/cli/logging" "github.com/entireio/cli/cmd/entire/cli/paths" @@ -333,6 +334,9 @@ func (s *GitStore) writeStandardCheckpointEntries(ctx context.Context, opts Writ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) { filePaths := SessionFilePaths{} + // Construct filter pipeline once for both transcript and prompts + pipeline := filter.FromContext(ctx) + // Clear any existing entries at this path so stale files from a previous // write (e.g. prompt.txt) don't persist on overwrite. for key := range entries { @@ -342,7 +346,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom } // Write transcript - if err := s.writeTranscript(ctx, opts, sessionPath, entries); err != nil { + if err := s.writeTranscript(ctx, opts, sessionPath, entries, pipeline); err != nil { return filePaths, err } filePaths.Transcript = "/" + sessionPath + paths.TranscriptFileName @@ -350,7 +354,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom // Write prompts if len(opts.Prompts) > 0 { - promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n")) + promptContent := pipeline.CleanString(redact.String(strings.Join(opts.Prompts, "\n\n---\n\n"))) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return filePaths, err @@ -544,7 +548,8 @@ func aggregateTokenUsage(a, b *agent.TokenUsage) *agent.TokenUsage { // writeTranscript writes the transcript file from in-memory content or file path. // If the transcript exceeds MaxChunkSize, it's split into multiple chunk files. -func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry) error { +// pipeline may be nil (no-op filtering). +func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry, pipeline *filter.Pipeline) error { transcript := opts.Transcript if len(transcript) == 0 && opts.TranscriptPath != "" { var readErr error @@ -558,11 +563,12 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio return nil } - // Redact secrets before chunking so content hash reflects redacted content + // Redact secrets then normalize machine-specific paths transcript, err := redact.JSONLBytes(transcript) if err != nil { return fmt.Errorf("failed to redact transcript secrets: %w", err) } + transcript = pipeline.Clean(transcript) // Chunk the transcript if it's too large chunks, err := agent.ChunkTranscript(ctx, transcript, opts.Agent) @@ -1115,6 +1121,9 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti return errors.New("invalid update options: checkpoint ID is required") } + // Construct filter pipeline once for both transcript and prompt updates + pipeline := filter.FromContext(ctx) + // Ensure sessions branch exists if err := s.ensureSessionsBranch(); err != nil { return fmt.Errorf("failed to ensure sessions branch: %w", err) @@ -1180,6 +1189,7 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti if err != nil { return fmt.Errorf("failed to redact transcript secrets: %w", err) } + transcript = pipeline.Clean(transcript) if err := s.replaceTranscript(ctx, transcript, opts.Agent, sessionPath, entries); err != nil { return fmt.Errorf("failed to replace transcript: %w", err) } @@ -1187,7 +1197,7 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti // Replace prompts (apply redaction as safety net) if len(opts.Prompts) > 0 { - promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n")) + promptContent := pipeline.CleanString(redact.String(strings.Join(opts.Prompts, "\n\n---\n\n"))) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return fmt.Errorf("failed to create prompt blob: %w", err) @@ -1402,8 +1412,9 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str return fmt.Errorf("path traversal detected: %s", relPath) } - // Create blob from file with secrets redaction - blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath) + // Create blob from file with secrets redaction (no transcript filter — + // copyMetadataDir is used for task checkpoint metadata, not transcripts) + blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath, nil) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) } @@ -1424,9 +1435,11 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str return nil } -// createRedactedBlobFromFile reads a file, applies secrets redaction, and creates a git blob. +// createRedactedBlobFromFile reads a file, applies secrets redaction and optional +// transcript filters, and creates a git blob. +// pipeline may be nil (no-op filtering). // JSONL files get JSONL-aware redaction; all other files get plain string redaction. -func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) { +func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string, pipeline *filter.Pipeline) (plumbing.Hash, filemode.FileMode, error) { info, err := os.Stat(filePath) if err != nil { return plumbing.ZeroHash, 0, fmt.Errorf("failed to stat file: %w", err) @@ -1463,6 +1476,8 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) content = redact.Bytes(content) } + content = pipeline.Clean(content) + hash, err := CreateBlobFromContent(repo, content) if err != nil { return plumbing.ZeroHash, 0, fmt.Errorf("failed to create blob: %w", err) diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index b9442fe32..fd5acaf8f 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -17,6 +17,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/agent" "github.com/entireio/cli/cmd/entire/cli/agent/types" + "github.com/entireio/cli/cmd/entire/cli/filter" "github.com/entireio/cli/cmd/entire/cli/jsonutil" "github.com/entireio/cli/cmd/entire/cli/logging" "github.com/entireio/cli/cmd/entire/cli/paths" @@ -302,6 +303,9 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb sessionMetadataDir := paths.EntireMetadataDir + "/" + opts.SessionID taskMetadataDir := sessionMetadataDir + "/tasks/" + opts.ToolUseID + // Construct filter pipeline once for all data in this function + pipeline := filter.FromContext(ctx) + var changes []TreeChange if opts.IsIncremental { @@ -313,6 +317,7 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb if err != nil { return plumbing.ZeroHash, fmt.Errorf("failed to redact incremental checkpoint: %w", err) } + incData = pipeline.Clean(incData) } incrementalCheckpoint := struct { Type string `json:"type"` @@ -387,7 +392,7 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb ) redacted = redact.Bytes(agentContent) } - agentContent = redacted + agentContent = pipeline.Clean(redacted) if blobHash, blobErr := CreateBlobFromContent(s.repo, agentContent); blobErr == nil { agentPath := taskMetadataDir + "/agent-" + opts.AgentID + ".jsonl" changes = append(changes, TreeChange{ @@ -754,7 +759,7 @@ func (s *GitStore) buildTreeWithChanges( // Metadata directory files if metadataDir != "" && metadataDirAbs != "" { - metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir) + metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir, filter.FromContext(ctx)) if metaErr != nil { return plumbing.ZeroHash, fmt.Errorf("failed to add metadata directory: %w", metaErr) } @@ -899,7 +904,7 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR // Use redacted blob creation for metadata files (transcripts, prompts, etc.) // to ensure PII and secrets are redacted before writing to git. - blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath) + blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath, nil) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) } @@ -925,7 +930,7 @@ type treeNode struct { // addDirectoryToChanges walks a filesystem directory and returns TreeChange entries // for each file, suitable for use with ApplyTreeChanges. // dirPathAbs is the absolute filesystem path; dirPathRel is the git tree-relative path. -func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string) ([]TreeChange, error) { +func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string, pipeline *filter.Pipeline) ([]TreeChange, error) { var changes []TreeChange err := filepath.Walk(dirPathAbs, func(path string, info os.FileInfo, err error) error { if err != nil { @@ -958,7 +963,7 @@ func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string) treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir)) - blobHash, mode, blobErr := createRedactedBlobFromFile(repo, path, treePath) + blobHash, mode, blobErr := createRedactedBlobFromFile(repo, path, treePath, pipeline) if blobErr != nil { return fmt.Errorf("failed to create blob for %s: %w", path, blobErr) } diff --git a/cmd/entire/cli/filter/context.go b/cmd/entire/cli/filter/context.go new file mode 100644 index 000000000..a1406a218 --- /dev/null +++ b/cmd/entire/cli/filter/context.go @@ -0,0 +1,45 @@ +package filter + +import ( + "context" + "log/slog" + "os" + + "github.com/entireio/cli/cmd/entire/cli/logging" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/settings" +) + +// FromContext constructs a Pipeline using the current repo root, home directory, +// and user-configured transcript filters from settings. +// On error, logs a warning and returns nil (nil *Pipeline is safe to use). +func FromContext(ctx context.Context) *Pipeline { + repoRoot, err := paths.WorktreeRoot(ctx) + if err != nil { + logging.Warn(ctx, "filter: failed to get repo root, transcript filtering disabled", + slog.String("error", err.Error())) + return nil + } + + homeDir, err := os.UserHomeDir() + if err != nil { + logging.Warn(ctx, "filter: failed to get home directory, transcript filtering disabled", + slog.String("error", err.Error())) + return nil + } + + s, err := settings.Load(ctx) + if err != nil { + logging.Warn(ctx, "filter: failed to load settings, transcript filtering disabled", + slog.String("error", err.Error())) + return nil + } + + p, err := NewPipeline(repoRoot, homeDir, s.TranscriptFilters) + if err != nil { + logging.Warn(ctx, "filter: failed to build pipeline, transcript filtering disabled", + slog.String("error", err.Error())) + return nil + } + return p +} diff --git a/cmd/entire/cli/filter/filter.go b/cmd/entire/cli/filter/filter.go new file mode 100644 index 000000000..d3bffe1c9 --- /dev/null +++ b/cmd/entire/cli/filter/filter.go @@ -0,0 +1,117 @@ +// Package filter provides clean/smudge transcript filtering for path normalization. +// Transcripts stored in checkpoints contain absolute paths (repo root, home directory) +// which are machine-specific. This package normalizes paths on store (clean) and +// restores them on read (smudge), similar to git's clean/smudge filter pipeline. +package filter + +import ( + "bytes" + + "github.com/entireio/cli/cmd/entire/cli/settings" +) + +// Filter defines a single find-and-replace pair used during clean/smudge. +// Clean replaces Match with Replace; Smudge reverses the substitution. +type Filter struct { + Match string // literal string to find + Replace string // literal string to substitute +} + +// Clean applies the filter in the "store" direction: Match → Replace. +func (f Filter) Clean(data []byte) []byte { + return bytes.ReplaceAll(data, []byte(f.Match), []byte(f.Replace)) +} + +// Smudge applies the filter in the "restore" direction: Replace → Match. +func (f Filter) Smudge(data []byte) []byte { + return bytes.ReplaceAll(data, []byte(f.Replace), []byte(f.Match)) +} + +// Pipeline holds an ordered list of filters and applies them collectively. +// A nil *Pipeline is safe to use — all methods are no-ops. +type Pipeline struct { + filters []Filter +} + +// NewPipeline creates a filter pipeline with built-in path filters and optional user filters. +// Built-in filters (applied first, most-specific first): +// 1. repoRoot → __ent__/repo +// 2. homeDir → __ent__/home +// +// User filters are appended after built-ins. +func NewPipeline(repoRoot, homeDir string, userFilters []settings.TranscriptFilter) (*Pipeline, error) { + var filters []Filter + + // Built-in filters, most-specific first (repo root before home dir, + // since repo root is typically under home dir). + if repoRoot != "" { + f := Filter{Match: repoRoot, Replace: "__ent__/repo"} + if err := ValidateFilter(f, true); err != nil { + return nil, err + } + filters = append(filters, f) + } + if homeDir != "" { + f := Filter{Match: homeDir, Replace: "__ent__/home"} + if err := ValidateFilter(f, true); err != nil { + return nil, err + } + filters = append(filters, f) + } + + // User filters + for _, uf := range userFilters { + f := Filter{ + Match: uf.Match, + Replace: "__ent_user__/" + uf.Key, + } + if err := ValidateFilter(f, false); err != nil { + return nil, err + } + filters = append(filters, f) + } + + return &Pipeline{filters: filters}, nil +} + +// Clean applies all filters in order (store direction). +// Safe to call on a nil *Pipeline (returns data unchanged). +func (p *Pipeline) Clean(data []byte) []byte { + if p == nil { + return data + } + for _, f := range p.filters { + data = f.Clean(data) + } + return data +} + +// Smudge applies all filters in reverse order (restore direction). +// Safe to call on a nil *Pipeline (returns data unchanged). +func (p *Pipeline) Smudge(data []byte) []byte { + if p == nil { + return data + } + for i := len(p.filters) - 1; i >= 0; i-- { + data = p.filters[i].Smudge(data) + } + return data +} + +// CleanString applies Clean to a string value. +// Safe to call on a nil *Pipeline (returns s unchanged). +func (p *Pipeline) CleanString(s string) string { + if p == nil { + return s + } + return string(p.Clean([]byte(s))) +} + +// SmudgeString applies Smudge to a string value. +// Safe to call on a nil *Pipeline (returns s unchanged). +func (p *Pipeline) SmudgeString(s string) string { + if p == nil { + return s + } + return string(p.Smudge([]byte(s))) +} diff --git a/cmd/entire/cli/filter/filter_test.go b/cmd/entire/cli/filter/filter_test.go new file mode 100644 index 000000000..ba2cf64dd --- /dev/null +++ b/cmd/entire/cli/filter/filter_test.go @@ -0,0 +1,176 @@ +package filter + +import ( + "testing" + + "github.com/entireio/cli/cmd/entire/cli/settings" +) + +func TestFilter_Clean(t *testing.T) { + t.Parallel() + f := Filter{Match: "/home/user/project", Replace: "__ent__/repo"} + got := f.Clean([]byte("file at /home/user/project/src/main.go")) + want := "file at __ent__/repo/src/main.go" + if string(got) != want { + t.Errorf("Clean() = %q, want %q", got, want) + } +} + +func TestFilter_Smudge(t *testing.T) { + t.Parallel() + f := Filter{Match: "/home/user/project", Replace: "__ent__/repo"} + got := f.Smudge([]byte("file at __ent__/repo/src/main.go")) + want := "file at /home/user/project/src/main.go" + if string(got) != want { + t.Errorf("Smudge() = %q, want %q", got, want) + } +} + +func TestFilter_RoundTrip(t *testing.T) { + t.Parallel() + f := Filter{Match: "/Users/soph/Work/repo", Replace: "__ent__/repo"} + original := []byte("editing /Users/soph/Work/repo/README.md and /Users/soph/Work/repo/go.mod") + cleaned := f.Clean(original) + restored := f.Smudge(cleaned) + if string(restored) != string(original) { + t.Errorf("round trip failed: got %q, want %q", restored, original) + } +} + +func TestPipeline_Clean_MostSpecificFirst(t *testing.T) { + t.Parallel() + // repoRoot is under homeDir — the more specific match must be applied first + p, err := NewPipeline("/home/user/project", "/home/user", nil) + if err != nil { + t.Fatal(err) + } + input := []byte("repo=/home/user/project home=/home/user") + got := string(p.Clean(input)) + want := "repo=__ent__/repo home=__ent__/home" + if got != want { + t.Errorf("Clean() = %q, want %q", got, want) + } +} + +func TestPipeline_Smudge_ReverseOrder(t *testing.T) { + t.Parallel() + p, err := NewPipeline("/home/user/project", "/home/user", nil) + if err != nil { + t.Fatal(err) + } + input := []byte("repo=__ent__/repo home=__ent__/home") + got := string(p.Smudge(input)) + want := "repo=/home/user/project home=/home/user" + if got != want { + t.Errorf("Smudge() = %q, want %q", got, want) + } +} + +func TestPipeline_RoundTrip(t *testing.T) { + t.Parallel() + p, err := NewPipeline("/home/user/project", "/home/user", nil) + if err != nil { + t.Fatal(err) + } + original := []byte("path=/home/user/project/src and home=/home/user/.config") + cleaned := p.Clean(original) + restored := p.Smudge(cleaned) + if string(restored) != string(original) { + t.Errorf("round trip failed:\n original: %q\n cleaned: %q\n restored: %q", original, cleaned, restored) + } +} + +func TestPipeline_UserFilters(t *testing.T) { + t.Parallel() + userFilters := []settings.TranscriptFilter{ + {Match: "acme-corp.internal", Key: "hostname"}, + } + p, err := NewPipeline("/home/user/project", "/home/user", userFilters) + if err != nil { + t.Fatal(err) + } + input := []byte("connecting to acme-corp.internal:8080") + got := string(p.Clean(input)) + want := "connecting to __ent_user__/hostname:8080" + if got != want { + t.Errorf("Clean() = %q, want %q", got, want) + } + + restored := string(p.Smudge([]byte(got))) + if restored != string(input) { + t.Errorf("Smudge() = %q, want %q", restored, input) + } +} + +func TestPipeline_EmptyPaths(t *testing.T) { + t.Parallel() + p, err := NewPipeline("", "", nil) + if err != nil { + t.Fatal(err) + } + input := []byte("nothing to filter here") + got := string(p.Clean(input)) + if got != string(input) { + t.Errorf("Clean() should be no-op with empty paths, got %q", got) + } +} + +func TestPipeline_CleanString(t *testing.T) { + t.Parallel() + p, err := NewPipeline("/home/user/project", "/home/user", nil) + if err != nil { + t.Fatal(err) + } + got := p.CleanString("path=/home/user/project/file.go") + want := "path=__ent__/repo/file.go" + if got != want { + t.Errorf("CleanString() = %q, want %q", got, want) + } +} + +func TestPipeline_SmudgeString(t *testing.T) { + t.Parallel() + p, err := NewPipeline("/home/user/project", "/home/user", nil) + if err != nil { + t.Fatal(err) + } + got := p.SmudgeString("path=__ent__/repo/file.go") + want := "path=/home/user/project/file.go" + if got != want { + t.Errorf("SmudgeString() = %q, want %q", got, want) + } +} + +func TestPipeline_NilSafe(t *testing.T) { + t.Parallel() + var p *Pipeline + input := []byte("should pass through unchanged") + if got := string(p.Clean(input)); got != string(input) { + t.Errorf("nil Clean() = %q, want %q", got, input) + } + if got := string(p.Smudge(input)); got != string(input) { + t.Errorf("nil Smudge() = %q, want %q", got, input) + } + if got := p.CleanString("test"); got != "test" { + t.Errorf("nil CleanString() = %q, want %q", got, "test") + } + if got := p.SmudgeString("test"); got != "test" { + t.Errorf("nil SmudgeString() = %q, want %q", got, "test") + } +} + +func TestPipeline_Idempotent(t *testing.T) { + t.Parallel() + p, err := NewPipeline("/home/user/project", "/home/user", nil) + if err != nil { + t.Fatal(err) + } + input := []byte("path=/home/user/project/src") + + // Cleaning twice should produce the same result + cleaned := p.Clean(input) + doubleCleaned := p.Clean(cleaned) + if string(cleaned) != string(doubleCleaned) { + t.Errorf("not idempotent:\n first: %q\n second: %q", cleaned, doubleCleaned) + } +} diff --git a/cmd/entire/cli/filter/validate.go b/cmd/entire/cli/filter/validate.go new file mode 100644 index 000000000..20108ace6 --- /dev/null +++ b/cmd/entire/cli/filter/validate.go @@ -0,0 +1,54 @@ +package filter + +import ( + "errors" + "fmt" + "strings" +) + +const ( + builtInPrefix = "__ent__/" + userPrefix = "__ent_user__/" + minMatchLen = 8 +) + +// ValidateFilter checks that a filter is well-formed. +// isBuiltIn distinguishes built-in filters (whose replacements start with __ent__/) +// from user filters (whose replacements must start with __ent_user__/). +func ValidateFilter(f Filter, isBuiltIn bool) error { + if f.Match == "" { + return errors.New("filter match must be non-empty") + } + if f.Replace == "" { + return errors.New("filter replace must be non-empty") + } + if f.Match == f.Replace { + return errors.New("filter match and replace must be different") + } + + // Idempotency: applying clean twice must produce the same result. + // This means the replacement string must not itself contain the match pattern, + // AND the match string must not contain the replacement pattern (which would + // cause smudge to corrupt the match). + if strings.Contains(f.Replace, f.Match) { + return fmt.Errorf("filter replace %q must not contain match %q (not idempotent)", f.Replace, f.Match) + } + if strings.Contains(f.Match, f.Replace) { + return fmt.Errorf("filter match %q must not contain replace %q (smudge would corrupt)", f.Match, f.Replace) + } + + if isBuiltIn { + if !strings.HasPrefix(f.Replace, builtInPrefix) { + return fmt.Errorf("built-in filter replace must start with %q, got %q", builtInPrefix, f.Replace) + } + } else { + if !strings.HasPrefix(f.Replace, userPrefix) { + return fmt.Errorf("user filter replace must start with %q, got %q", userPrefix, f.Replace) + } + if len(f.Match) < minMatchLen { + return fmt.Errorf("user filter match must be at least %d characters, got %d", minMatchLen, len(f.Match)) + } + } + + return nil +} diff --git a/cmd/entire/cli/filter/validate_test.go b/cmd/entire/cli/filter/validate_test.go new file mode 100644 index 000000000..bdf9aca1a --- /dev/null +++ b/cmd/entire/cli/filter/validate_test.go @@ -0,0 +1,85 @@ +package filter + +import ( + "testing" +) + +func TestValidateFilter_BuiltIn_Valid(t *testing.T) { + t.Parallel() + f := Filter{Match: "/home/user/project", Replace: "__ent__/repo"} + if err := ValidateFilter(f, true); err != nil { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFilter_BuiltIn_WrongPrefix(t *testing.T) { + t.Parallel() + f := Filter{Match: "/home/user/project", Replace: "__ent_user__/repo"} + if err := ValidateFilter(f, true); err == nil { + t.Error("expected error for wrong prefix on built-in filter") + } +} + +func TestValidateFilter_User_Valid(t *testing.T) { + t.Parallel() + f := Filter{Match: "acme-corp.internal", Replace: "__ent_user__/hostname"} + if err := ValidateFilter(f, false); err != nil { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateFilter_User_WrongPrefix(t *testing.T) { + t.Parallel() + f := Filter{Match: "acme-corp.internal", Replace: "__ent__/hostname"} + if err := ValidateFilter(f, false); err == nil { + t.Error("expected error for wrong prefix on user filter") + } +} + +func TestValidateFilter_User_TooShort(t *testing.T) { + t.Parallel() + f := Filter{Match: "short", Replace: "__ent_user__/x"} + if err := ValidateFilter(f, false); err == nil { + t.Error("expected error for short match on user filter") + } +} + +func TestValidateFilter_EmptyMatch(t *testing.T) { + t.Parallel() + f := Filter{Match: "", Replace: "__ent__/repo"} + if err := ValidateFilter(f, true); err == nil { + t.Error("expected error for empty match") + } +} + +func TestValidateFilter_EmptyReplace(t *testing.T) { + t.Parallel() + f := Filter{Match: "/home/user", Replace: ""} + if err := ValidateFilter(f, true); err == nil { + t.Error("expected error for empty replace") + } +} + +func TestValidateFilter_SameMatchAndReplace(t *testing.T) { + t.Parallel() + f := Filter{Match: "__ent__/repo", Replace: "__ent__/repo"} + if err := ValidateFilter(f, true); err == nil { + t.Error("expected error for same match and replace") + } +} + +func TestValidateFilter_ReplaceContainsMatch(t *testing.T) { + t.Parallel() + f := Filter{Match: "foo", Replace: "__ent__/foo"} + if err := ValidateFilter(f, true); err == nil { + t.Error("expected error when replace contains match") + } +} + +func TestValidateFilter_MatchContainsReplace(t *testing.T) { + t.Parallel() + f := Filter{Match: "/home/__ent__/repo/project", Replace: "__ent__/repo"} + if err := ValidateFilter(f, true); err == nil { + t.Error("expected error when match contains replace") + } +} diff --git a/cmd/entire/cli/settings/settings.go b/cmd/entire/cli/settings/settings.go index 49f1895c9..d93c4b36c 100644 --- a/cmd/entire/cli/settings/settings.go +++ b/cmd/entire/cli/settings/settings.go @@ -71,11 +71,23 @@ type EntireSettings struct { // plugins (entire-agent-* binaries on $PATH). Defaults to false. ExternalAgents bool `json:"external_agents,omitempty"` + // TranscriptFilters defines user-supplied clean/smudge filters applied to + // transcripts before storage. Each entry's Match is replaced by + // __ent_user__/ on clean; the substitution is reversed on smudge. + TranscriptFilters []TranscriptFilter `json:"transcript_filters,omitempty"` + // Deprecated: no longer used. Exists to tolerate old settings files // that still contain "strategy": "auto-commit" or similar. Strategy string `json:"strategy,omitempty"` } +// TranscriptFilter is a user-configured find-and-replace pair for transcripts. +// Match is the literal string to find; Key becomes __ent_user__/ as the replacement. +type TranscriptFilter struct { + Match string `json:"match"` + Key string `json:"key"` +} + // RedactionSettings configures redaction behavior beyond the default secret detection. type RedactionSettings struct { PII *PIISettings `json:"pii,omitempty"` @@ -279,6 +291,15 @@ func mergeJSON(settings *EntireSettings, data []byte) error { } } + // Override transcript_filters if present (wholesale replace, not merge) + if tfRaw, ok := raw["transcript_filters"]; ok { + var tf []TranscriptFilter + if err := json.Unmarshal(tfRaw, &tf); err != nil { + return fmt.Errorf("parsing transcript_filters field: %w", err) + } + settings.TranscriptFilters = tf + } + // Override external_agents if present if externalAgentsRaw, ok := raw["external_agents"]; ok { var ea bool diff --git a/cmd/entire/cli/strategy/manual_commit_rewind.go b/cmd/entire/cli/strategy/manual_commit_rewind.go index d0b0ddb83..8e5811bd5 100644 --- a/cmd/entire/cli/strategy/manual_commit_rewind.go +++ b/cmd/entire/cli/strategy/manual_commit_rewind.go @@ -15,6 +15,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/agent/types" cpkg "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/filter" "github.com/entireio/cli/cmd/entire/cli/osroot" "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/entireio/cli/cmd/entire/cli/trailers" @@ -210,7 +211,7 @@ func (s *ManualCommitStrategy) GetLogsOnlyRewindPoints(ctx context.Context, limi var sessionPrompt string var sessionPrompts []string if metadataTree != nil { - checkpointPath := paths.CheckpointPath(cpInfo.CheckpointID) //nolint:staticcheck // already present in codebase + checkpointPath := paths.CheckpointPath(cpInfo.CheckpointID) // For multi-session checkpoints, read all prompts if cpInfo.SessionCount > 1 && len(cpInfo.SessionIDs) > 1 { sessionPrompts = ReadAllSessionPromptsFromTree(metadataTree, checkpointPath, cpInfo.SessionCount, cpInfo.SessionIDs) @@ -670,6 +671,9 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(ctx context.Context, w, errW io.W fmt.Fprintf(w, "Restoring %d sessions from checkpoint:\n", totalSessions) } + // Construct filter pipeline once for all sessions (avoids re-reading settings per session) + pipeline := filter.FromContext(ctx) + // Restore all sessions (oldest to newest, using 0-based indexing) var restored []RestoredSession for i := range totalSessions { @@ -735,7 +739,7 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(ctx context.Context, w, errW io.W AgentName: sessionAgent.Name(), RepoPath: repoRoot, SessionRef: sessionFile, - NativeData: content.Transcript, + NativeData: pipeline.Smudge(content.Transcript), } if writeErr := sessionAgent.WriteSession(ctx, agentSession); writeErr != nil { if totalSessions > 1 { From a820aeb6fbea396a936d7f6d413456b76378b2bc Mon Sep 17 00:00:00 2001 From: Stefan Haubold Date: Mon, 23 Mar 2026 15:16:30 +0100 Subject: [PATCH 2/6] add integration tests Entire-Checkpoint: 9d45e0e8b9e7 --- cmd/entire/cli/checkpoint/committed.go | 18 +- cmd/entire/cli/checkpoint/temporary.go | 7 +- .../cli/integration_test/filter_test.go | 176 ++++++++++++++++++ 3 files changed, 190 insertions(+), 11 deletions(-) create mode 100644 cmd/entire/cli/integration_test/filter_test.go diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 60547ce90..27cb2e011 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -354,7 +354,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom // Write prompts if len(opts.Prompts) > 0 { - promptContent := pipeline.CleanString(redact.String(strings.Join(opts.Prompts, "\n\n---\n\n"))) + promptContent := redact.String(pipeline.CleanString(strings.Join(opts.Prompts, "\n\n---\n\n"))) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return filePaths, err @@ -563,12 +563,13 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio return nil } - // Redact secrets then normalize machine-specific paths + // Normalize machine-specific paths first (before redaction can alter them), + // then redact secrets. + transcript = pipeline.Clean(transcript) transcript, err := redact.JSONLBytes(transcript) if err != nil { return fmt.Errorf("failed to redact transcript secrets: %w", err) } - transcript = pipeline.Clean(transcript) // Chunk the transcript if it's too large chunks, err := agent.ChunkTranscript(ctx, transcript, opts.Agent) @@ -1185,11 +1186,11 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti // Replace transcript (full replace, not append) // Apply redaction as safety net (caller should redact, but we ensure it here) if len(opts.Transcript) > 0 { - transcript, err := redact.JSONLBytes(opts.Transcript) + transcript := pipeline.Clean(opts.Transcript) + transcript, err := redact.JSONLBytes(transcript) if err != nil { return fmt.Errorf("failed to redact transcript secrets: %w", err) } - transcript = pipeline.Clean(transcript) if err := s.replaceTranscript(ctx, transcript, opts.Agent, sessionPath, entries); err != nil { return fmt.Errorf("failed to replace transcript: %w", err) } @@ -1197,7 +1198,7 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti // Replace prompts (apply redaction as safety net) if len(opts.Prompts) > 0 { - promptContent := pipeline.CleanString(redact.String(strings.Join(opts.Prompts, "\n\n---\n\n"))) + promptContent := redact.String(pipeline.CleanString(strings.Join(opts.Prompts, "\n\n---\n\n"))) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return fmt.Errorf("failed to create prompt blob: %w", err) @@ -1466,6 +1467,9 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string, return hash, mode, nil } + // Normalize paths before redaction so redaction doesn't alter matchable paths. + content = pipeline.Clean(content) + if strings.HasSuffix(treePath, ".jsonl") { redacted, jsonlErr := redact.JSONLBytes(content) if jsonlErr != nil { @@ -1476,8 +1480,6 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string, content = redact.Bytes(content) } - content = pipeline.Clean(content) - hash, err := CreateBlobFromContent(repo, content) if err != nil { return plumbing.ZeroHash, 0, fmt.Errorf("failed to create blob: %w", err) diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index fd5acaf8f..315b9af42 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -313,11 +313,11 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb var incData []byte var err error if opts.IncrementalData != nil { - incData, err = redact.JSONLBytes(opts.IncrementalData) + incData = pipeline.Clean(opts.IncrementalData) + incData, err = redact.JSONLBytes(incData) if err != nil { return plumbing.ZeroHash, fmt.Errorf("failed to redact incremental checkpoint: %w", err) } - incData = pipeline.Clean(incData) } incrementalCheckpoint := struct { Type string `json:"type"` @@ -384,6 +384,7 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb // Add subagent transcript if available if opts.SubagentTranscriptPath != "" && opts.AgentID != "" { if agentContent, readErr := os.ReadFile(opts.SubagentTranscriptPath); readErr == nil { + agentContent = pipeline.Clean(agentContent) redacted, jsonlErr := redact.JSONLBytes(agentContent) if jsonlErr != nil { logging.Warn(ctx, "subagent transcript is not valid JSONL, falling back to plain redaction", @@ -392,7 +393,7 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb ) redacted = redact.Bytes(agentContent) } - agentContent = pipeline.Clean(redacted) + agentContent = redacted if blobHash, blobErr := CreateBlobFromContent(s.repo, agentContent); blobErr == nil { agentPath := taskMetadataDir + "/agent-" + opts.AgentID + ".jsonl" changes = append(changes, TreeChange{ diff --git a/cmd/entire/cli/integration_test/filter_test.go b/cmd/entire/cli/integration_test/filter_test.go new file mode 100644 index 000000000..a8812473f --- /dev/null +++ b/cmd/entire/cli/integration_test/filter_test.go @@ -0,0 +1,176 @@ +//go:build integration + +package integration + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/paths" +) + +// TestFilter_CleanRepoPathsInTranscript verifies that absolute repo root paths +// in transcripts are normalized to __ent__/repo on the metadata branch after +// a checkpoint is condensed. +func TestFilter_CleanRepoPathsInTranscript(t *testing.T) { + t.Parallel() + env := NewFeatureBranchEnv(t) + session := env.NewSession() + + // Simulate a prompt that mentions the repo root path + if err := env.SimulateUserPromptSubmitWithPrompt(session.ID, + "Edit the file at "+env.RepoDir+"/hello.go"); err != nil { + t.Fatalf("SimulateUserPromptSubmit failed: %v", err) + } + + // Build a transcript that contains the repo root path + env.WriteFile("hello.go", "package main\nfunc main() {}") + session.TranscriptBuilder.AddUserMessage("Edit " + env.RepoDir + "/hello.go") + session.TranscriptBuilder.AddAssistantMessage("I'll edit " + env.RepoDir + "/hello.go for you.") + toolID := session.TranscriptBuilder.AddToolUse("mcp__acp__Write", env.RepoDir+"/hello.go", "package main\nfunc main() {}") + session.TranscriptBuilder.AddToolResult(toolID) + session.TranscriptBuilder.AddAssistantMessage("Done!") + if err := session.TranscriptBuilder.WriteToFile(session.TranscriptPath); err != nil { + t.Fatalf("failed to write transcript: %v", err) + } + + if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil { + t.Fatalf("SimulateStop failed: %v", err) + } + + // Commit with shadow hooks to trigger condensation + env.GitCommitWithShadowHooks("Add hello.go", "hello.go") + + // Read transcript from the metadata branch + checkpointID := env.GetLatestCheckpointID() + transcriptPath := checkpointID[:2] + "/" + checkpointID[2:] + "/0/" + paths.TranscriptFileName + transcriptContent, found := env.ReadFileFromBranch(paths.MetadataBranchName, transcriptPath) + if !found { + t.Fatalf("transcript not found on %s at %s", paths.MetadataBranchName, transcriptPath) + } + + // Verify repo root is replaced with __ent__/repo + if strings.Contains(transcriptContent, env.RepoDir) { + t.Errorf("transcript on metadata branch should not contain repo root %q, but does:\n%s", + env.RepoDir, transcriptContent[:min(500, len(transcriptContent))]) + } + if !strings.Contains(transcriptContent, "__ent__/repo") { + t.Errorf("transcript on metadata branch should contain __ent__/repo placeholder, but doesn't:\n%s", + transcriptContent[:min(500, len(transcriptContent))]) + } +} + +// TestFilter_CleanPromptsOnMetadataBranch verifies that prompts stored on the +// metadata branch have absolute paths normalized. +func TestFilter_CleanPromptsOnMetadataBranch(t *testing.T) { + t.Parallel() + env := NewFeatureBranchEnv(t) + session := env.NewSession() + + env.WriteFile("app.go", "package main\n// fixed") + + // Submit a prompt containing the repo root path + prompt := "Please fix the bug in " + env.RepoDir + "/app.go" + if err := env.SimulateUserPromptSubmitWithPrompt(session.ID, prompt); err != nil { + t.Fatalf("SimulateUserPromptSubmit failed: %v", err) + } + + session.CreateTranscript(prompt, []FileChange{{Path: "app.go", Content: "package main\n// fixed"}}) + if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil { + t.Fatalf("SimulateStop failed: %v", err) + } + + // Commit with shadow hooks to trigger condensation + env.GitCommitWithShadowHooks("Fix app.go", "app.go") + + // Read prompt from the metadata branch + checkpointID := env.GetLatestCheckpointID() + promptPath := checkpointID[:2] + "/" + checkpointID[2:] + "/0/" + paths.PromptFileName + promptContent, found := env.ReadFileFromBranch(paths.MetadataBranchName, promptPath) + if !found { + t.Fatalf("prompt not found on %s at %s", paths.MetadataBranchName, promptPath) + } + + if strings.Contains(promptContent, env.RepoDir) { + t.Errorf("prompt should not contain repo root %q, got: %s", env.RepoDir, promptContent) + } + if !strings.Contains(promptContent, "__ent__/repo") { + t.Errorf("prompt should contain __ent__/repo, got: %s", promptContent) + } +} + +// TestFilter_SmudgeRestoresPathsOnLogsOnlyRewind verifies that when doing a +// logs-only rewind (from metadata branch), the transcript written to the agent's +// session file has __ent__/repo replaced back with the actual repo root path. +func TestFilter_SmudgeRestoresPathsOnLogsOnlyRewind(t *testing.T) { + t.Parallel() + env := NewFeatureBranchEnv(t) + session := env.NewSession() + + env.WriteFile("main.go", "v1") + + if err := env.SimulateUserPromptSubmit(session.ID); err != nil { + t.Fatalf("SimulateUserPromptSubmit failed: %v", err) + } + + // Build transcript with repo root paths + session.TranscriptBuilder.AddUserMessage("Edit " + env.RepoDir + "/main.go") + session.TranscriptBuilder.AddAssistantMessage("Editing " + env.RepoDir + "/main.go") + toolID := session.TranscriptBuilder.AddToolUse("mcp__acp__Write", env.RepoDir+"/main.go", "v1") + session.TranscriptBuilder.AddToolResult(toolID) + if err := session.TranscriptBuilder.WriteToFile(session.TranscriptPath); err != nil { + t.Fatalf("failed to write transcript: %v", err) + } + + if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil { + t.Fatalf("SimulateStop failed: %v", err) + } + + // Commit to condensate checkpoint to metadata branch + env.GitCommitWithShadowHooks("Add main.go", "main.go") + + // Delete the local transcript so we can verify it's restored from the metadata branch + localTranscript := filepath.Join(env.ClaudeProjectDir, session.ID+".jsonl") + os.Remove(localTranscript) + + // Make another commit so the checkpoint becomes a logs-only rewind point + env.WriteFile("extra.go", "package main") + env.GitCommitWithShadowHooks("Add extra.go", "extra.go") + + // Get rewind points and find the logs-only one + points := env.GetRewindPoints() + var logsOnlyPoint *RewindPoint + for i := range points { + if points[i].IsLogsOnly { + logsOnlyPoint = &points[i] + break + } + } + if logsOnlyPoint == nil { + t.Fatal("expected a logs-only rewind point after additional commit") + } + + // Rewind logs-only to restore transcript from metadata branch + if err := env.RewindLogsOnly(logsOnlyPoint.ID); err != nil { + t.Fatalf("RewindLogsOnly failed: %v", err) + } + + // Read the restored transcript + restoredData, err := os.ReadFile(localTranscript) + if err != nil { + t.Fatalf("failed to read restored transcript at %s: %v", localTranscript, err) + } + restoredContent := string(restoredData) + + // Verify real paths are restored (smudge applied) + if strings.Contains(restoredContent, "__ent__/repo") { + t.Errorf("restored transcript should not contain __ent__/repo placeholder, but does:\n%s", + restoredContent[:min(500, len(restoredContent))]) + } + if !strings.Contains(restoredContent, env.RepoDir) { + t.Errorf("restored transcript should contain actual repo root %q, but doesn't:\n%s", + env.RepoDir, restoredContent[:min(500, len(restoredContent))]) + } +} From 78effa7fd5549af4c3bf0b51dc36bfcce662e2b0 Mon Sep 17 00:00:00 2001 From: Stefan Haubold Date: Mon, 23 Mar 2026 16:57:09 +0100 Subject: [PATCH 3/6] fixed some review findings Entire-Checkpoint: d9e97437b51f --- cmd/entire/cli/checkpoint/temporary.go | 12 +++++--- .../cli/checkpoint/tree_surgery_equiv_test.go | 2 +- cmd/entire/cli/filter/filter.go | 4 +++ cmd/entire/cli/filter/filter_test.go | 23 +++++++++++++++ cmd/entire/cli/filter/validate.go | 16 +++++++++++ cmd/entire/cli/filter/validate_test.go | 28 +++++++++++++++++++ .../cli/strategy/manual_commit_rewind.go | 2 +- 7 files changed, 81 insertions(+), 6 deletions(-) diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index 315b9af42..6028e2ea2 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -107,8 +107,11 @@ func (s *GitStore) WriteTemporary(ctx context.Context, opts WriteTemporaryOption allDeletedFiles = opts.DeletedFiles } + // Construct filter pipeline once for the entire write + pipeline := filter.FromContext(ctx) + // Build tree with changes - treeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, allDeletedFiles, opts.MetadataDir, opts.MetadataDirAbs) + treeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, allDeletedFiles, opts.MetadataDir, opts.MetadataDirAbs, pipeline) if err != nil { return WriteTemporaryResult{}, fmt.Errorf("failed to build tree: %w", err) } @@ -265,8 +268,8 @@ func (s *GitStore) WriteTemporaryTask(ctx context.Context, opts WriteTemporaryTa allFiles = append(allFiles, opts.ModifiedFiles...) allFiles = append(allFiles, opts.NewFiles...) - // Build new tree with code changes (no metadata dir yet) - newTreeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, opts.DeletedFiles, "", "") + // Build new tree with code changes (no metadata dir yet; nil pipeline — task checkpoints have no metadata dir) + newTreeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, opts.DeletedFiles, "", "", nil) if err != nil { return plumbing.ZeroHash, fmt.Errorf("failed to build tree: %w", err) } @@ -716,6 +719,7 @@ func (s *GitStore) buildTreeWithChanges( baseTreeHash plumbing.Hash, modifiedFiles, deletedFiles []string, metadataDir, metadataDirAbs string, + pipeline *filter.Pipeline, ) (plumbing.Hash, error) { // Get worktree root for resolving file paths // This is critical because fileExists() and createBlobFromFile() use os.Stat() @@ -760,7 +764,7 @@ func (s *GitStore) buildTreeWithChanges( // Metadata directory files if metadataDir != "" && metadataDirAbs != "" { - metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir, filter.FromContext(ctx)) + metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir, pipeline) if metaErr != nil { return plumbing.ZeroHash, fmt.Errorf("failed to add metadata directory: %w", metaErr) } diff --git a/cmd/entire/cli/checkpoint/tree_surgery_equiv_test.go b/cmd/entire/cli/checkpoint/tree_surgery_equiv_test.go index 481c92e37..2df2d7c32 100644 --- a/cmd/entire/cli/checkpoint/tree_surgery_equiv_test.go +++ b/cmd/entire/cli/checkpoint/tree_surgery_equiv_test.go @@ -57,7 +57,7 @@ func TestBuildTreeWithChanges_EquivalenceWithFlattenRebuild(t *testing.T) { //no t.Chdir(dir) // --- New approach: ApplyTreeChanges (what buildTreeWithChanges now does) --- - newHash, err := store.buildTreeWithChanges(context.Background(), baseTreeHash, modifiedFiles, deletedFiles, metadataDir, metadataDirAbs) + newHash, err := store.buildTreeWithChanges(context.Background(), baseTreeHash, modifiedFiles, deletedFiles, metadataDir, metadataDirAbs, nil) if err != nil { t.Fatalf("buildTreeWithChanges (new): %v", err) } diff --git a/cmd/entire/cli/filter/filter.go b/cmd/entire/cli/filter/filter.go index d3bffe1c9..1b7009f3b 100644 --- a/cmd/entire/cli/filter/filter.go +++ b/cmd/entire/cli/filter/filter.go @@ -6,6 +6,7 @@ package filter import ( "bytes" + "fmt" "github.com/entireio/cli/cmd/entire/cli/settings" ) @@ -61,6 +62,9 @@ func NewPipeline(repoRoot, homeDir string, userFilters []settings.TranscriptFilt // User filters for _, uf := range userFilters { + if err := validateUserFilterKey(uf.Key); err != nil { + return nil, fmt.Errorf("invalid transcript filter key %q: %w", uf.Key, err) + } f := Filter{ Match: uf.Match, Replace: "__ent_user__/" + uf.Key, diff --git a/cmd/entire/cli/filter/filter_test.go b/cmd/entire/cli/filter/filter_test.go index ba2cf64dd..9df8a9ed9 100644 --- a/cmd/entire/cli/filter/filter_test.go +++ b/cmd/entire/cli/filter/filter_test.go @@ -159,6 +159,29 @@ func TestPipeline_NilSafe(t *testing.T) { } } +func TestNewPipeline_RejectsInvalidKey(t *testing.T) { + t.Parallel() + tests := []struct { + name string + key string + }{ + {name: "empty key", key: ""}, + {name: "key with slash", key: "foo/bar"}, + {name: "key with __ent prefix", key: "__ent_reserved"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + _, err := NewPipeline("/home/user/project", "/home/user", []settings.TranscriptFilter{ + {Match: "long-enough-match", Key: tt.key}, + }) + if err == nil { + t.Errorf("expected NewPipeline to reject key %q", tt.key) + } + }) + } +} + func TestPipeline_Idempotent(t *testing.T) { t.Parallel() p, err := NewPipeline("/home/user/project", "/home/user", nil) diff --git a/cmd/entire/cli/filter/validate.go b/cmd/entire/cli/filter/validate.go index 20108ace6..7eb96652d 100644 --- a/cmd/entire/cli/filter/validate.go +++ b/cmd/entire/cli/filter/validate.go @@ -52,3 +52,19 @@ func ValidateFilter(f Filter, isBuiltIn bool) error { return nil } + +// validateUserFilterKey checks that a user-supplied filter key is safe to use +// as a replacement token suffix. Rejects empty keys, keys containing path +// separators, and keys that could collide with built-in marker prefixes. +func validateUserFilterKey(key string) error { + if key == "" { + return errors.New("must be non-empty") + } + if strings.ContainsAny(key, "/\\") { + return errors.New("must not contain path separators") + } + if strings.HasPrefix(key, "__ent") { + return errors.New("must not start with reserved prefix \"__ent\"") + } + return nil +} diff --git a/cmd/entire/cli/filter/validate_test.go b/cmd/entire/cli/filter/validate_test.go index bdf9aca1a..d9897729c 100644 --- a/cmd/entire/cli/filter/validate_test.go +++ b/cmd/entire/cli/filter/validate_test.go @@ -83,3 +83,31 @@ func TestValidateFilter_MatchContainsReplace(t *testing.T) { t.Error("expected error when match contains replace") } } + +func TestValidateUserFilterKey(t *testing.T) { + t.Parallel() + tests := []struct { + name string + key string + wantErr bool + }{ + {name: "valid key", key: "hostname", wantErr: false}, + {name: "empty key", key: "", wantErr: true}, + {name: "key with slash", key: "foo/bar", wantErr: true}, + {name: "key with backslash", key: "foo\\bar", wantErr: true}, + {name: "key with __ent prefix", key: "__ent_something", wantErr: true}, + {name: "key with __ent__ prefix", key: "__ent__repo", wantErr: true}, + {name: "key that is just __ent", key: "__ent", wantErr: true}, + {name: "key starting with underscore", key: "_valid", wantErr: false}, + {name: "hyphenated key", key: "my-hostname", wantErr: false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + err := validateUserFilterKey(tt.key) + if (err != nil) != tt.wantErr { + t.Errorf("validateUserFilterKey(%q) error = %v, wantErr %v", tt.key, err, tt.wantErr) + } + }) + } +} diff --git a/cmd/entire/cli/strategy/manual_commit_rewind.go b/cmd/entire/cli/strategy/manual_commit_rewind.go index 8e5811bd5..cf8738fee 100644 --- a/cmd/entire/cli/strategy/manual_commit_rewind.go +++ b/cmd/entire/cli/strategy/manual_commit_rewind.go @@ -211,7 +211,7 @@ func (s *ManualCommitStrategy) GetLogsOnlyRewindPoints(ctx context.Context, limi var sessionPrompt string var sessionPrompts []string if metadataTree != nil { - checkpointPath := paths.CheckpointPath(cpInfo.CheckpointID) + checkpointPath := paths.CheckpointPath(cpInfo.CheckpointID) //nolint:staticcheck // already present in codebase // For multi-session checkpoints, read all prompts if cpInfo.SessionCount > 1 && len(cpInfo.SessionIDs) > 1 { sessionPrompts = ReadAllSessionPromptsFromTree(metadataTree, checkpointPath, cpInfo.SessionCount, cpInfo.SessionIDs) From f867e906eb783b3b314e9e549d70f461eb14f908 Mon Sep 17 00:00:00 2001 From: Stefan Haubold Date: Mon, 23 Mar 2026 20:14:45 +0100 Subject: [PATCH 4/6] add min length for replacements Entire-Checkpoint: 659b29339900 --- cmd/entire/cli/filter/filter_test.go | 12 +++++++++++ cmd/entire/cli/filter/validate.go | 14 ++++++++----- cmd/entire/cli/filter/validate_test.go | 20 +++++++++++++++++++ .../cli/strategy/manual_commit_rewind.go | 2 +- 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/cmd/entire/cli/filter/filter_test.go b/cmd/entire/cli/filter/filter_test.go index 9df8a9ed9..e5aade170 100644 --- a/cmd/entire/cli/filter/filter_test.go +++ b/cmd/entire/cli/filter/filter_test.go @@ -182,6 +182,18 @@ func TestNewPipeline_RejectsInvalidKey(t *testing.T) { } } +func TestNewPipeline_RejectsShortBuiltInPaths(t *testing.T) { + t.Parallel() + _, err := NewPipeline("/", "/home/user", nil) + if err == nil { + t.Error("expected NewPipeline to reject repo root \"/\"") + } + _, err = NewPipeline("/ab", "/home/user", nil) + if err == nil { + t.Error("expected NewPipeline to reject repo root \"/ab\"") + } +} + func TestPipeline_Idempotent(t *testing.T) { t.Parallel() p, err := NewPipeline("/home/user/project", "/home/user", nil) diff --git a/cmd/entire/cli/filter/validate.go b/cmd/entire/cli/filter/validate.go index 7eb96652d..48853ddfc 100644 --- a/cmd/entire/cli/filter/validate.go +++ b/cmd/entire/cli/filter/validate.go @@ -7,9 +7,10 @@ import ( ) const ( - builtInPrefix = "__ent__/" - userPrefix = "__ent_user__/" - minMatchLen = 8 + builtInPrefix = "__ent__/" + userPrefix = "__ent_user__/" + minBuiltInMatchLen = 4 + minUserMatchLen = 8 ) // ValidateFilter checks that a filter is well-formed. @@ -41,12 +42,15 @@ func ValidateFilter(f Filter, isBuiltIn bool) error { if !strings.HasPrefix(f.Replace, builtInPrefix) { return fmt.Errorf("built-in filter replace must start with %q, got %q", builtInPrefix, f.Replace) } + if len(f.Match) < minBuiltInMatchLen { + return fmt.Errorf("built-in filter match must be at least %d characters, got %d", minBuiltInMatchLen, len(f.Match)) + } } else { if !strings.HasPrefix(f.Replace, userPrefix) { return fmt.Errorf("user filter replace must start with %q, got %q", userPrefix, f.Replace) } - if len(f.Match) < minMatchLen { - return fmt.Errorf("user filter match must be at least %d characters, got %d", minMatchLen, len(f.Match)) + if len(f.Match) < minUserMatchLen { + return fmt.Errorf("user filter match must be at least %d characters, got %d", minUserMatchLen, len(f.Match)) } } diff --git a/cmd/entire/cli/filter/validate_test.go b/cmd/entire/cli/filter/validate_test.go index d9897729c..83f898890 100644 --- a/cmd/entire/cli/filter/validate_test.go +++ b/cmd/entire/cli/filter/validate_test.go @@ -44,6 +44,26 @@ func TestValidateFilter_User_TooShort(t *testing.T) { } } +func TestValidateFilter_BuiltIn_TooShort(t *testing.T) { + t.Parallel() + f := Filter{Match: "/", Replace: "__ent__/repo"} + if err := ValidateFilter(f, true); err == nil { + t.Error("expected error for short match on built-in filter") + } + f2 := Filter{Match: "/ab", Replace: "__ent__/repo"} + if err := ValidateFilter(f2, true); err == nil { + t.Error("expected error for 3-char match on built-in filter") + } +} + +func TestValidateFilter_BuiltIn_MinLength(t *testing.T) { + t.Parallel() + f := Filter{Match: "/abc", Replace: "__ent__/repo"} + if err := ValidateFilter(f, true); err != nil { + t.Errorf("unexpected error for 4-char built-in match: %v", err) + } +} + func TestValidateFilter_EmptyMatch(t *testing.T) { t.Parallel() f := Filter{Match: "", Replace: "__ent__/repo"} diff --git a/cmd/entire/cli/strategy/manual_commit_rewind.go b/cmd/entire/cli/strategy/manual_commit_rewind.go index cf8738fee..8e5811bd5 100644 --- a/cmd/entire/cli/strategy/manual_commit_rewind.go +++ b/cmd/entire/cli/strategy/manual_commit_rewind.go @@ -211,7 +211,7 @@ func (s *ManualCommitStrategy) GetLogsOnlyRewindPoints(ctx context.Context, limi var sessionPrompt string var sessionPrompts []string if metadataTree != nil { - checkpointPath := paths.CheckpointPath(cpInfo.CheckpointID) //nolint:staticcheck // already present in codebase + checkpointPath := paths.CheckpointPath(cpInfo.CheckpointID) // For multi-session checkpoints, read all prompts if cpInfo.SessionCount > 1 && len(cpInfo.SessionIDs) > 1 { sessionPrompts = ReadAllSessionPromptsFromTree(metadataTree, checkpointPath, cpInfo.SessionCount, cpInfo.SessionIDs) From d171ba529d813bf37bbdd825fa38a73f9aaa9152 Mon Sep 17 00:00:00 2001 From: Stefan Haubold Date: Mon, 23 Mar 2026 20:36:35 +0100 Subject: [PATCH 5/6] run base filters even if settings loading fails Entire-Checkpoint: 2aec67f9083b --- cmd/entire/cli/filter/context.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cmd/entire/cli/filter/context.go b/cmd/entire/cli/filter/context.go index a1406a218..84e9286ff 100644 --- a/cmd/entire/cli/filter/context.go +++ b/cmd/entire/cli/filter/context.go @@ -28,14 +28,18 @@ func FromContext(ctx context.Context) *Pipeline { return nil } + var userFilters []settings.TranscriptFilter s, err := settings.Load(ctx) if err != nil { - logging.Warn(ctx, "filter: failed to load settings, transcript filtering disabled", + logging.Warn(ctx, "filter: failed to load settings, user transcript filters unavailable", slog.String("error", err.Error())) - return nil + // Continue with built-in filters only — repo root and home dir + // normalization still works even when settings are broken. + } else { + userFilters = s.TranscriptFilters } - p, err := NewPipeline(repoRoot, homeDir, s.TranscriptFilters) + p, err := NewPipeline(repoRoot, homeDir, userFilters) if err != nil { logging.Warn(ctx, "filter: failed to build pipeline, transcript filtering disabled", slog.String("error", err.Error())) From 5f35a1662f0a67706a517dc5e18cb442bc28a7ed Mon Sep 17 00:00:00 2001 From: Stefan Haubold Date: Mon, 23 Mar 2026 21:31:29 +0100 Subject: [PATCH 6/6] handle reverting filtering when displaying data Entire-Checkpoint: 4cb30309e49e --- cmd/entire/cli/checkpoint/checkpoint.go | 2 +- cmd/entire/cli/checkpoint/committed.go | 11 ++- cmd/entire/cli/checkpoint/display.go | 74 +++++++++++++++++++ cmd/entire/cli/checkpoint/temporary.go | 2 - cmd/entire/cli/explain.go | 10 +-- cmd/entire/cli/filter/filter.go | 6 -- cmd/entire/cli/resume.go | 2 +- cmd/entire/cli/rewind.go | 4 +- .../cli/strategy/manual_commit_rewind.go | 9 ++- 9 files changed, 96 insertions(+), 24 deletions(-) create mode 100644 cmd/entire/cli/checkpoint/display.go diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 40133fbcf..4dd9470b9 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -93,7 +93,7 @@ type Store interface { // ReadSessionContent reads the actual content for a specific session within a checkpoint. // sessionIndex is 0-based (0 for first session, 1 for second, etc.). - // Returns the session's metadata, transcript, and prompts. + // Returns the session's metadata, transcript, and prompts in raw (stored) form. ReadSessionContent(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*SessionContent, error) // ReadSessionContentByID reads a session's content by its session ID. diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 27cb2e011..c14459f74 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -334,7 +334,6 @@ func (s *GitStore) writeStandardCheckpointEntries(ctx context.Context, opts Writ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) { filePaths := SessionFilePaths{} - // Construct filter pipeline once for both transcript and prompts pipeline := filter.FromContext(ctx) // Clear any existing entries at this path so stale files from a previous @@ -354,7 +353,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom // Write prompts if len(opts.Prompts) > 0 { - promptContent := redact.String(pipeline.CleanString(strings.Join(opts.Prompts, "\n\n---\n\n"))) + promptContent := cleanAndRedactPrompts(pipeline, opts.Prompts) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return filePaths, err @@ -546,6 +545,11 @@ func aggregateTokenUsage(a, b *agent.TokenUsage) *agent.TokenUsage { return result } +// cleanAndRedactPrompts joins, filters, and redacts prompts for storage. +func cleanAndRedactPrompts(pipeline *filter.Pipeline, prompts []string) string { + return redact.String(pipeline.CleanString(strings.Join(prompts, "\n\n---\n\n"))) +} + // writeTranscript writes the transcript file from in-memory content or file path. // If the transcript exceeds MaxChunkSize, it's split into multiple chunk files. // pipeline may be nil (no-op filtering). @@ -1122,7 +1126,6 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti return errors.New("invalid update options: checkpoint ID is required") } - // Construct filter pipeline once for both transcript and prompt updates pipeline := filter.FromContext(ctx) // Ensure sessions branch exists @@ -1198,7 +1201,7 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti // Replace prompts (apply redaction as safety net) if len(opts.Prompts) > 0 { - promptContent := redact.String(pipeline.CleanString(strings.Join(opts.Prompts, "\n\n---\n\n"))) + promptContent := cleanAndRedactPrompts(pipeline, opts.Prompts) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return fmt.Errorf("failed to create prompt blob: %w", err) diff --git a/cmd/entire/cli/checkpoint/display.go b/cmd/entire/cli/checkpoint/display.go new file mode 100644 index 000000000..39b83af0e --- /dev/null +++ b/cmd/entire/cli/checkpoint/display.go @@ -0,0 +1,74 @@ +package checkpoint + +import ( + "context" + + "github.com/entireio/cli/cmd/entire/cli/agent/types" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/filter" + + "github.com/go-git/go-git/v6/plumbing" +) + +// SmudgeSessionContent applies the smudge filter to transcript and prompt +// fields of a SessionContent in place. Safe to call with nil content or pipeline. +func SmudgeSessionContent(content *SessionContent, pipeline *filter.Pipeline) { + if content == nil || pipeline == nil { + return + } + content.Transcript = pipeline.Smudge(content.Transcript) + content.Prompts = pipeline.SmudgeString(content.Prompts) +} + +// ReadSessionContentForDisplay reads a session's content and applies the smudge +// filter so stored placeholders are replaced with machine-specific paths. +// Use this for user-facing output; use ReadSessionContent for internal operations. +func (s *GitStore) ReadSessionContentForDisplay(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*SessionContent, error) { + content, err := s.ReadSessionContent(ctx, checkpointID, sessionIndex) + if err != nil { + return nil, err + } + SmudgeSessionContent(content, filter.FromContext(ctx)) + return content, nil +} + +// ReadLatestSessionContentForDisplay is the display variant of ReadLatestSessionContent. +func (s *GitStore) ReadLatestSessionContentForDisplay(ctx context.Context, checkpointID id.CheckpointID) (*SessionContent, error) { + content, err := s.ReadLatestSessionContent(ctx, checkpointID) + if err != nil { + return nil, err + } + SmudgeSessionContent(content, filter.FromContext(ctx)) + return content, nil +} + +// GetSessionLogForDisplay is the display variant of GetSessionLog. +func (s *GitStore) GetSessionLogForDisplay(ctx context.Context, cpID id.CheckpointID) ([]byte, string, error) { + transcript, sessionID, err := s.GetSessionLog(ctx, cpID) + if err != nil { + return nil, "", err + } + pipeline := filter.FromContext(ctx) + return pipeline.Smudge(transcript), sessionID, nil +} + +// LookupSessionLogForDisplay is a convenience function that opens the repository +// and retrieves a smudged session log by checkpoint ID. +func LookupSessionLogForDisplay(ctx context.Context, cpID id.CheckpointID) ([]byte, string, error) { + transcript, sessionID, err := LookupSessionLog(ctx, cpID) + if err != nil { + return nil, "", err + } + pipeline := filter.FromContext(ctx) + return pipeline.Smudge(transcript), sessionID, nil +} + +// GetTranscriptFromCommitForDisplay is the display variant of GetTranscriptFromCommit. +func (s *GitStore) GetTranscriptFromCommitForDisplay(ctx context.Context, commitHash plumbing.Hash, metadataDir string, agentType types.AgentType) ([]byte, error) { + transcript, err := s.GetTranscriptFromCommit(ctx, commitHash, metadataDir, agentType) + if err != nil { + return nil, err + } + pipeline := filter.FromContext(ctx) + return pipeline.Smudge(transcript), nil +} diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index 6028e2ea2..96c09eb6d 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -107,7 +107,6 @@ func (s *GitStore) WriteTemporary(ctx context.Context, opts WriteTemporaryOption allDeletedFiles = opts.DeletedFiles } - // Construct filter pipeline once for the entire write pipeline := filter.FromContext(ctx) // Build tree with changes @@ -306,7 +305,6 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb sessionMetadataDir := paths.EntireMetadataDir + "/" + opts.SessionID taskMetadataDir := sessionMetadataDir + "/tasks/" + opts.ToolUseID - // Construct filter pipeline once for all data in this function pipeline := filter.FromContext(ctx) var changes []TreeChange diff --git a/cmd/entire/cli/explain.go b/cmd/entire/cli/explain.go index 4e8e880a9..174c5df50 100644 --- a/cmd/entire/cli/explain.go +++ b/cmd/entire/cli/explain.go @@ -258,7 +258,7 @@ func runExplainCheckpoint(ctx context.Context, w, errW io.Writer, checkpointIDPr } // Load latest session content (needed for transcript and metadata) - content, err := store.ReadLatestSessionContent(ctx, fullCheckpointID) + content, err := store.ReadLatestSessionContentForDisplay(ctx, fullCheckpointID) if err != nil { return fmt.Errorf("failed to read checkpoint content: %w", err) } @@ -269,7 +269,7 @@ func runExplainCheckpoint(ctx context.Context, w, errW io.Writer, checkpointIDPr return err } // Reload the content to get the updated summary - content, err = store.ReadLatestSessionContent(ctx, fullCheckpointID) + content, err = store.ReadLatestSessionContentForDisplay(ctx, fullCheckpointID) if err != nil { return fmt.Errorf("failed to reload checkpoint: %w", err) } @@ -404,7 +404,7 @@ func explainTemporaryCheckpoint(ctx context.Context, w io.Writer, repo *git.Repo // Handle raw transcript output if rawTranscript { - transcriptBytes, transcriptErr := store.GetTranscriptFromCommit(ctx, tc.CommitHash, tc.MetadataDir, agentType) + transcriptBytes, transcriptErr := store.GetTranscriptFromCommitForDisplay(ctx, tc.CommitHash, tc.MetadataDir, agentType) if transcriptErr != nil || len(transcriptBytes) == 0 { // Return specific error message (consistent with committed checkpoints) return fmt.Sprintf("checkpoint %s has no transcript", tc.CommitHash.String()[:7]), false @@ -443,7 +443,7 @@ func explainTemporaryCheckpoint(ctx context.Context, w io.Writer, repo *git.Repo var fullTranscript []byte var scopedTranscript []byte if full || verbose { - fullTranscript, _ = store.GetTranscriptFromCommit(ctx, tc.CommitHash, tc.MetadataDir, agentType) //nolint:errcheck // Best-effort + fullTranscript, _ = store.GetTranscriptFromCommitForDisplay(ctx, tc.CommitHash, tc.MetadataDir, agentType) //nolint:errcheck // Best-effort if verbose && len(fullTranscript) > 0 { // Compute scoped transcript by finding where parent's transcript ended @@ -452,7 +452,7 @@ func explainTemporaryCheckpoint(ctx context.Context, w io.Writer, repo *git.Repo scopedTranscript = fullTranscript // Default to full if no parent if shadowCommit.NumParents() > 0 { if parent, parentErr := shadowCommit.Parent(0); parentErr == nil { - parentTranscript, _ := store.GetTranscriptFromCommit(ctx, parent.Hash, tc.MetadataDir, agentType) //nolint:errcheck // Best-effort + parentTranscript, _ := store.GetTranscriptFromCommitForDisplay(ctx, parent.Hash, tc.MetadataDir, agentType) //nolint:errcheck // Best-effort if len(parentTranscript) > 0 { parentOffset := transcriptOffset(parentTranscript, agentType) scopedTranscript = scopeTranscriptForCheckpoint(fullTranscript, parentOffset, agentType) diff --git a/cmd/entire/cli/filter/filter.go b/cmd/entire/cli/filter/filter.go index 1b7009f3b..e1fd0ff8d 100644 --- a/cmd/entire/cli/filter/filter.go +++ b/cmd/entire/cli/filter/filter.go @@ -105,17 +105,11 @@ func (p *Pipeline) Smudge(data []byte) []byte { // CleanString applies Clean to a string value. // Safe to call on a nil *Pipeline (returns s unchanged). func (p *Pipeline) CleanString(s string) string { - if p == nil { - return s - } return string(p.Clean([]byte(s))) } // SmudgeString applies Smudge to a string value. // Safe to call on a nil *Pipeline (returns s unchanged). func (p *Pipeline) SmudgeString(s string) string { - if p == nil { - return s - } return string(p.Smudge([]byte(s))) } diff --git a/cmd/entire/cli/resume.go b/cmd/entire/cli/resume.go index 831571c89..b055ce38f 100644 --- a/cmd/entire/cli/resume.go +++ b/cmd/entire/cli/resume.go @@ -745,7 +745,7 @@ func resumeSingleSession(ctx context.Context, w, errW io.Writer, ag agent.Agent, return nil } - logContent, _, err := checkpoint.LookupSessionLog(ctx, checkpointID) + logContent, _, err := checkpoint.LookupSessionLogForDisplay(ctx, checkpointID) if err != nil { if errors.Is(err, checkpoint.ErrCheckpointNotFound) || errors.Is(err, checkpoint.ErrNoTranscript) { logging.Debug(ctx, "resume session completed (no metadata)", diff --git a/cmd/entire/cli/rewind.go b/cmd/entire/cli/rewind.go index d8c3659d2..b3ce3efc5 100644 --- a/cmd/entire/cli/rewind.go +++ b/cmd/entire/cli/rewind.go @@ -678,7 +678,7 @@ func restoreSessionTranscript(ctx context.Context, w io.Writer, transcriptFile, // Returns the session ID that was actually used (may differ from input if checkpoint provides one). func restoreSessionTranscriptFromStrategy(ctx context.Context, cpID id.CheckpointID, sessionID string, agent agentpkg.Agent) (string, error) { // Get transcript content from checkpoint storage - content, returnedSessionID, err := checkpoint.LookupSessionLog(ctx, cpID) + content, returnedSessionID, err := checkpoint.LookupSessionLogForDisplay(ctx, cpID) if err != nil { return "", fmt.Errorf("failed to get session log: %w", err) } @@ -725,7 +725,7 @@ func restoreSessionTranscriptFromShadow(ctx context.Context, commitHash, metadat // Get transcript from shadow branch commit tree store := checkpoint.NewGitStore(repo) - content, err := store.GetTranscriptFromCommit(ctx, hash, metadataDir, agent.Type()) + content, err := store.GetTranscriptFromCommitForDisplay(ctx, hash, metadataDir, agent.Type()) if err != nil { return "", fmt.Errorf("failed to get transcript from shadow branch: %w", err) } diff --git a/cmd/entire/cli/strategy/manual_commit_rewind.go b/cmd/entire/cli/strategy/manual_commit_rewind.go index 8e5811bd5..e11ea07b8 100644 --- a/cmd/entire/cli/strategy/manual_commit_rewind.go +++ b/cmd/entire/cli/strategy/manual_commit_rewind.go @@ -211,7 +211,7 @@ func (s *ManualCommitStrategy) GetLogsOnlyRewindPoints(ctx context.Context, limi var sessionPrompt string var sessionPrompts []string if metadataTree != nil { - checkpointPath := paths.CheckpointPath(cpInfo.CheckpointID) + checkpointPath := paths.CheckpointPath(cpInfo.CheckpointID) //nolint:staticcheck // already present in codebase // For multi-session checkpoints, read all prompts if cpInfo.SessionCount > 1 && len(cpInfo.SessionIDs) > 1 { sessionPrompts = ReadAllSessionPromptsFromTree(metadataTree, checkpointPath, cpInfo.SessionCount, cpInfo.SessionIDs) @@ -671,7 +671,7 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(ctx context.Context, w, errW io.W fmt.Fprintf(w, "Restoring %d sessions from checkpoint:\n", totalSessions) } - // Construct filter pipeline once for all sessions (avoids re-reading settings per session) + // Construct filter pipeline once for all sessions pipeline := filter.FromContext(ctx) // Restore all sessions (oldest to newest, using 0-based indexing) @@ -685,6 +685,7 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(ctx context.Context, w, errW io.W if content == nil || len(content.Transcript) == 0 { continue } + cpkg.SmudgeSessionContent(content, pipeline) sessionID := content.Metadata.SessionID if sessionID == "" { @@ -739,7 +740,7 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(ctx context.Context, w, errW io.W AgentName: sessionAgent.Name(), RepoPath: repoRoot, SessionRef: sessionFile, - NativeData: pipeline.Smudge(content.Transcript), + NativeData: content.Transcript, } if writeErr := sessionAgent.WriteSession(ctx, agentSession); writeErr != nil { if totalSessions > 1 { @@ -824,6 +825,7 @@ type SessionRestoreInfo struct { func (s *ManualCommitStrategy) classifySessionsForRestore(ctx context.Context, repoRoot string, store cpkg.Store, checkpointID id.CheckpointID, summary *cpkg.CheckpointSummary) []SessionRestoreInfo { var sessions []SessionRestoreInfo + pipeline := filter.FromContext(ctx) totalSessions := len(summary.Sessions) // Check all sessions (0-based indexing) for i := range totalSessions { @@ -831,6 +833,7 @@ func (s *ManualCommitStrategy) classifySessionsForRestore(ctx context.Context, r if err != nil || content == nil || len(content.Transcript) == 0 { continue } + cpkg.SmudgeSessionContent(content, pipeline) sessionID := content.Metadata.SessionID if sessionID == "" || content.Metadata.Agent == "" {