Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ type Store interface {

// ReadSessionContent reads the actual content for a specific session within a checkpoint.
// sessionIndex is 0-based (0 for first session, 1 for second, etc.).
// Returns the session's metadata, transcript, and prompts.
// Returns the session's metadata, transcript, and prompts in raw (stored) form.
ReadSessionContent(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*SessionContent, error)

// ReadSessionContentByID reads a session's content by its session ID.
Expand Down
40 changes: 30 additions & 10 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/entireio/cli/cmd/entire/cli/agent"
"github.com/entireio/cli/cmd/entire/cli/agent/types"
"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/filter"
"github.com/entireio/cli/cmd/entire/cli/jsonutil"
"github.com/entireio/cli/cmd/entire/cli/logging"
"github.com/entireio/cli/cmd/entire/cli/paths"
Expand Down Expand Up @@ -333,6 +334,8 @@ func (s *GitStore) writeStandardCheckpointEntries(ctx context.Context, opts Writ
func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) {
filePaths := SessionFilePaths{}

pipeline := filter.FromContext(ctx)

// Clear any existing entries at this path so stale files from a previous
// write (e.g. prompt.txt) don't persist on overwrite.
for key := range entries {
Expand All @@ -342,15 +345,15 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
}

// Write transcript
if err := s.writeTranscript(ctx, opts, sessionPath, entries); err != nil {
if err := s.writeTranscript(ctx, opts, sessionPath, entries, pipeline); err != nil {
return filePaths, err
}
filePaths.Transcript = "/" + sessionPath + paths.TranscriptFileName
filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName

// Write prompts
if len(opts.Prompts) > 0 {
promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n"))
promptContent := cleanAndRedactPrompts(pipeline, opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return filePaths, err
Expand Down Expand Up @@ -542,9 +545,15 @@ func aggregateTokenUsage(a, b *agent.TokenUsage) *agent.TokenUsage {
return result
}

// cleanAndRedactPrompts joins, filters, and redacts prompts for storage.
func cleanAndRedactPrompts(pipeline *filter.Pipeline, prompts []string) string {
return redact.String(pipeline.CleanString(strings.Join(prompts, "\n\n---\n\n")))
}

// writeTranscript writes the transcript file from in-memory content or file path.
// If the transcript exceeds MaxChunkSize, it's split into multiple chunk files.
func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry) error {
// pipeline may be nil (no-op filtering).
func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry, pipeline *filter.Pipeline) error {
transcript := opts.Transcript
if len(transcript) == 0 && opts.TranscriptPath != "" {
var readErr error
Expand All @@ -558,7 +567,9 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio
return nil
}

// Redact secrets before chunking so content hash reflects redacted content
// Normalize machine-specific paths first (before redaction can alter them),
// then redact secrets.
transcript = pipeline.Clean(transcript)
transcript, err := redact.JSONLBytes(transcript)
if err != nil {
return fmt.Errorf("failed to redact transcript secrets: %w", err)
Expand Down Expand Up @@ -1115,6 +1126,8 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti
return errors.New("invalid update options: checkpoint ID is required")
}

pipeline := filter.FromContext(ctx)

// Ensure sessions branch exists
if err := s.ensureSessionsBranch(); err != nil {
return fmt.Errorf("failed to ensure sessions branch: %w", err)
Expand Down Expand Up @@ -1176,7 +1189,8 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti
// Replace transcript (full replace, not append)
// Apply redaction as safety net (caller should redact, but we ensure it here)
if len(opts.Transcript) > 0 {
transcript, err := redact.JSONLBytes(opts.Transcript)
transcript := pipeline.Clean(opts.Transcript)
transcript, err := redact.JSONLBytes(transcript)
if err != nil {
return fmt.Errorf("failed to redact transcript secrets: %w", err)
}
Expand All @@ -1187,7 +1201,7 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti

// Replace prompts (apply redaction as safety net)
if len(opts.Prompts) > 0 {
promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n"))
promptContent := cleanAndRedactPrompts(pipeline, opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return fmt.Errorf("failed to create prompt blob: %w", err)
Expand Down Expand Up @@ -1402,8 +1416,9 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str
return fmt.Errorf("path traversal detected: %s", relPath)
}

// Create blob from file with secrets redaction
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath)
// Create blob from file with secrets redaction (no transcript filter —
// copyMetadataDir is used for task checkpoint metadata, not transcripts)
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath, nil)
Comment on lines +1419 to +1421
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copyMetadataDir is documented as being used for “task checkpoints, subagent transcripts, etc.”, but it passes a nil filter pipeline into createRedactedBlobFromFile. This means any absolute paths inside those copied metadata files (e.g., task prompts, agent-*.jsonl) will bypass the new clean filter and be committed un-normalized, undermining the path/privacy filtering behavior elsewhere in the PR. Consider constructing a pipeline once (like in writeSessionToSubdirectory) and passing it through here, or selectively enabling filtering for transcript/prompt-like metadata files while keeping it disabled for files that must remain byte-identical.

Suggested change
// Create blob from file with secrets redaction (no transcript filter —
// copyMetadataDir is used for task checkpoint metadata, not transcripts)
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath, nil)
// Create blob from file with secrets/path redaction. We intentionally avoid
// the full transcript filter here (copyMetadataDir is used for task
// checkpoint metadata, not transcripts), but still apply the standard
// clean filter pipeline so absolute paths and other sensitive data are
// normalized consistently with the rest of the checkpoint code.
cleanPipeline := filter.NewCleanPipeline()
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath, cleanPipeline)

Copilot uses AI. Check for mistakes.
if err != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, err)
}
Expand All @@ -1424,9 +1439,11 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str
return nil
}

// createRedactedBlobFromFile reads a file, applies secrets redaction, and creates a git blob.
// createRedactedBlobFromFile reads a file, applies secrets redaction and optional
// transcript filters, and creates a git blob.
// pipeline may be nil (no-op filtering).
// JSONL files get JSONL-aware redaction; all other files get plain string redaction.
func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string, pipeline *filter.Pipeline) (plumbing.Hash, filemode.FileMode, error) {
info, err := os.Stat(filePath)
if err != nil {
return plumbing.ZeroHash, 0, fmt.Errorf("failed to stat file: %w", err)
Expand All @@ -1453,6 +1470,9 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string)
return hash, mode, nil
}

// Normalize paths before redaction so redaction doesn't alter matchable paths.
content = pipeline.Clean(content)

if strings.HasSuffix(treePath, ".jsonl") {
redacted, jsonlErr := redact.JSONLBytes(content)
if jsonlErr != nil {
Expand Down
74 changes: 74 additions & 0 deletions cmd/entire/cli/checkpoint/display.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package checkpoint

import (
"context"

"github.com/entireio/cli/cmd/entire/cli/agent/types"
"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/filter"

"github.com/go-git/go-git/v6/plumbing"
)

// SmudgeSessionContent applies the smudge filter to transcript and prompt
// fields of a SessionContent in place. Safe to call with nil content or pipeline.
func SmudgeSessionContent(content *SessionContent, pipeline *filter.Pipeline) {
if content == nil || pipeline == nil {
return
}
content.Transcript = pipeline.Smudge(content.Transcript)
content.Prompts = pipeline.SmudgeString(content.Prompts)
}

// ReadSessionContentForDisplay reads a session's content and applies the smudge
// filter so stored placeholders are replaced with machine-specific paths.
// Use this for user-facing output; use ReadSessionContent for internal operations.
func (s *GitStore) ReadSessionContentForDisplay(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*SessionContent, error) {
content, err := s.ReadSessionContent(ctx, checkpointID, sessionIndex)
if err != nil {
return nil, err
}
SmudgeSessionContent(content, filter.FromContext(ctx))
return content, nil
}

// ReadLatestSessionContentForDisplay is the display variant of ReadLatestSessionContent.
func (s *GitStore) ReadLatestSessionContentForDisplay(ctx context.Context, checkpointID id.CheckpointID) (*SessionContent, error) {
content, err := s.ReadLatestSessionContent(ctx, checkpointID)
if err != nil {
return nil, err
}
SmudgeSessionContent(content, filter.FromContext(ctx))
return content, nil
}

// GetSessionLogForDisplay is the display variant of GetSessionLog.
func (s *GitStore) GetSessionLogForDisplay(ctx context.Context, cpID id.CheckpointID) ([]byte, string, error) {
transcript, sessionID, err := s.GetSessionLog(ctx, cpID)
if err != nil {
return nil, "", err
}
pipeline := filter.FromContext(ctx)
return pipeline.Smudge(transcript), sessionID, nil
}

// LookupSessionLogForDisplay is a convenience function that opens the repository
// and retrieves a smudged session log by checkpoint ID.
func LookupSessionLogForDisplay(ctx context.Context, cpID id.CheckpointID) ([]byte, string, error) {
transcript, sessionID, err := LookupSessionLog(ctx, cpID)
if err != nil {
return nil, "", err
}
pipeline := filter.FromContext(ctx)
return pipeline.Smudge(transcript), sessionID, nil
}

// GetTranscriptFromCommitForDisplay is the display variant of GetTranscriptFromCommit.
func (s *GitStore) GetTranscriptFromCommitForDisplay(ctx context.Context, commitHash plumbing.Hash, metadataDir string, agentType types.AgentType) ([]byte, error) {
transcript, err := s.GetTranscriptFromCommit(ctx, commitHash, metadataDir, agentType)
if err != nil {
return nil, err
}
pipeline := filter.FromContext(ctx)
return pipeline.Smudge(transcript), nil
}
24 changes: 16 additions & 8 deletions cmd/entire/cli/checkpoint/temporary.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (

"github.com/entireio/cli/cmd/entire/cli/agent"
"github.com/entireio/cli/cmd/entire/cli/agent/types"
"github.com/entireio/cli/cmd/entire/cli/filter"
"github.com/entireio/cli/cmd/entire/cli/jsonutil"
"github.com/entireio/cli/cmd/entire/cli/logging"
"github.com/entireio/cli/cmd/entire/cli/paths"
Expand Down Expand Up @@ -106,8 +107,10 @@ func (s *GitStore) WriteTemporary(ctx context.Context, opts WriteTemporaryOption
allDeletedFiles = opts.DeletedFiles
}

pipeline := filter.FromContext(ctx)

// Build tree with changes
treeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, allDeletedFiles, opts.MetadataDir, opts.MetadataDirAbs)
treeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, allDeletedFiles, opts.MetadataDir, opts.MetadataDirAbs, pipeline)
if err != nil {
return WriteTemporaryResult{}, fmt.Errorf("failed to build tree: %w", err)
}
Expand Down Expand Up @@ -264,8 +267,8 @@ func (s *GitStore) WriteTemporaryTask(ctx context.Context, opts WriteTemporaryTa
allFiles = append(allFiles, opts.ModifiedFiles...)
allFiles = append(allFiles, opts.NewFiles...)

// Build new tree with code changes (no metadata dir yet)
newTreeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, opts.DeletedFiles, "", "")
// Build new tree with code changes (no metadata dir yet; nil pipeline — task checkpoints have no metadata dir)
newTreeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, opts.DeletedFiles, "", "", nil)
if err != nil {
return plumbing.ZeroHash, fmt.Errorf("failed to build tree: %w", err)
}
Expand Down Expand Up @@ -302,14 +305,17 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb
sessionMetadataDir := paths.EntireMetadataDir + "/" + opts.SessionID
taskMetadataDir := sessionMetadataDir + "/tasks/" + opts.ToolUseID

pipeline := filter.FromContext(ctx)

var changes []TreeChange

if opts.IsIncremental {
// Incremental checkpoint: only add the checkpoint file
var incData []byte
var err error
if opts.IncrementalData != nil {
incData, err = redact.JSONLBytes(opts.IncrementalData)
incData = pipeline.Clean(opts.IncrementalData)
incData, err = redact.JSONLBytes(incData)
if err != nil {
return plumbing.ZeroHash, fmt.Errorf("failed to redact incremental checkpoint: %w", err)
}
Expand Down Expand Up @@ -379,6 +385,7 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb
// Add subagent transcript if available
if opts.SubagentTranscriptPath != "" && opts.AgentID != "" {
if agentContent, readErr := os.ReadFile(opts.SubagentTranscriptPath); readErr == nil {
agentContent = pipeline.Clean(agentContent)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Task session transcript missing path normalization filtering

Medium Severity

In addTaskMetadataToTree, the PR adds pipeline.Clean() to the incremental data (line 317) and the subagent transcript (line 388), but the main session transcript read from opts.TranscriptPath at line 354 is stored without pipeline.Clean() being applied. This means absolute machine-specific paths in the session transcript won't be normalized to placeholders, breaking cross-machine portability for task checkpoint transcripts while the subagent transcript in the same function is correctly normalized.

Additional Locations (1)
Fix in Cursor Fix in Web

redacted, jsonlErr := redact.JSONLBytes(agentContent)
if jsonlErr != nil {
logging.Warn(ctx, "subagent transcript is not valid JSONL, falling back to plain redaction",
Expand Down Expand Up @@ -710,6 +717,7 @@ func (s *GitStore) buildTreeWithChanges(
baseTreeHash plumbing.Hash,
modifiedFiles, deletedFiles []string,
metadataDir, metadataDirAbs string,
pipeline *filter.Pipeline,
) (plumbing.Hash, error) {
// Get worktree root for resolving file paths
// This is critical because fileExists() and createBlobFromFile() use os.Stat()
Expand Down Expand Up @@ -754,7 +762,7 @@ func (s *GitStore) buildTreeWithChanges(

// Metadata directory files
if metadataDir != "" && metadataDirAbs != "" {
metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir)
metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir, pipeline)
if metaErr != nil {
return plumbing.ZeroHash, fmt.Errorf("failed to add metadata directory: %w", metaErr)
}
Expand Down Expand Up @@ -899,7 +907,7 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR

// Use redacted blob creation for metadata files (transcripts, prompts, etc.)
// to ensure PII and secrets are redacted before writing to git.
blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath)
blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath, nil)
if err != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, err)
}
Expand All @@ -925,7 +933,7 @@ type treeNode struct {
// addDirectoryToChanges walks a filesystem directory and returns TreeChange entries
// for each file, suitable for use with ApplyTreeChanges.
// dirPathAbs is the absolute filesystem path; dirPathRel is the git tree-relative path.
func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string) ([]TreeChange, error) {
func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string, pipeline *filter.Pipeline) ([]TreeChange, error) {
var changes []TreeChange
err := filepath.Walk(dirPathAbs, func(path string, info os.FileInfo, err error) error {
if err != nil {
Expand Down Expand Up @@ -958,7 +966,7 @@ func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string)

treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir))

blobHash, mode, blobErr := createRedactedBlobFromFile(repo, path, treePath)
blobHash, mode, blobErr := createRedactedBlobFromFile(repo, path, treePath, pipeline)
if blobErr != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, blobErr)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/entire/cli/checkpoint/tree_surgery_equiv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestBuildTreeWithChanges_EquivalenceWithFlattenRebuild(t *testing.T) { //no
t.Chdir(dir)

// --- New approach: ApplyTreeChanges (what buildTreeWithChanges now does) ---
newHash, err := store.buildTreeWithChanges(context.Background(), baseTreeHash, modifiedFiles, deletedFiles, metadataDir, metadataDirAbs)
newHash, err := store.buildTreeWithChanges(context.Background(), baseTreeHash, modifiedFiles, deletedFiles, metadataDir, metadataDirAbs, nil)
if err != nil {
t.Fatalf("buildTreeWithChanges (new): %v", err)
}
Expand Down
10 changes: 5 additions & 5 deletions cmd/entire/cli/explain.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ func runExplainCheckpoint(ctx context.Context, w, errW io.Writer, checkpointIDPr
}

// Load latest session content (needed for transcript and metadata)
content, err := store.ReadLatestSessionContent(ctx, fullCheckpointID)
content, err := store.ReadLatestSessionContentForDisplay(ctx, fullCheckpointID)
if err != nil {
return fmt.Errorf("failed to read checkpoint content: %w", err)
}
Expand All @@ -269,7 +269,7 @@ func runExplainCheckpoint(ctx context.Context, w, errW io.Writer, checkpointIDPr
return err
}
// Reload the content to get the updated summary
content, err = store.ReadLatestSessionContent(ctx, fullCheckpointID)
content, err = store.ReadLatestSessionContentForDisplay(ctx, fullCheckpointID)
if err != nil {
return fmt.Errorf("failed to reload checkpoint: %w", err)
}
Expand Down Expand Up @@ -404,7 +404,7 @@ func explainTemporaryCheckpoint(ctx context.Context, w io.Writer, repo *git.Repo

// Handle raw transcript output
if rawTranscript {
transcriptBytes, transcriptErr := store.GetTranscriptFromCommit(ctx, tc.CommitHash, tc.MetadataDir, agentType)
transcriptBytes, transcriptErr := store.GetTranscriptFromCommitForDisplay(ctx, tc.CommitHash, tc.MetadataDir, agentType)
if transcriptErr != nil || len(transcriptBytes) == 0 {
// Return specific error message (consistent with committed checkpoints)
return fmt.Sprintf("checkpoint %s has no transcript", tc.CommitHash.String()[:7]), false
Expand Down Expand Up @@ -443,7 +443,7 @@ func explainTemporaryCheckpoint(ctx context.Context, w io.Writer, repo *git.Repo
var fullTranscript []byte
var scopedTranscript []byte
if full || verbose {
fullTranscript, _ = store.GetTranscriptFromCommit(ctx, tc.CommitHash, tc.MetadataDir, agentType) //nolint:errcheck // Best-effort
fullTranscript, _ = store.GetTranscriptFromCommitForDisplay(ctx, tc.CommitHash, tc.MetadataDir, agentType) //nolint:errcheck // Best-effort

if verbose && len(fullTranscript) > 0 {
// Compute scoped transcript by finding where parent's transcript ended
Expand All @@ -452,7 +452,7 @@ func explainTemporaryCheckpoint(ctx context.Context, w io.Writer, repo *git.Repo
scopedTranscript = fullTranscript // Default to full if no parent
if shadowCommit.NumParents() > 0 {
if parent, parentErr := shadowCommit.Parent(0); parentErr == nil {
parentTranscript, _ := store.GetTranscriptFromCommit(ctx, parent.Hash, tc.MetadataDir, agentType) //nolint:errcheck // Best-effort
parentTranscript, _ := store.GetTranscriptFromCommitForDisplay(ctx, parent.Hash, tc.MetadataDir, agentType) //nolint:errcheck // Best-effort
if len(parentTranscript) > 0 {
parentOffset := transcriptOffset(parentTranscript, agentType)
scopedTranscript = scopeTranscriptForCheckpoint(fullTranscript, parentOffset, agentType)
Expand Down
Loading
Loading