-
Notifications
You must be signed in to change notification settings - Fork 266
Handle path filtering and general other filtering #758
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5ca7c36
a820aeb
78effa7
f867e90
d171ba5
5f35a16
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| package checkpoint | ||
|
|
||
| import ( | ||
| "context" | ||
|
|
||
| "github.com/entireio/cli/cmd/entire/cli/agent/types" | ||
| "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" | ||
| "github.com/entireio/cli/cmd/entire/cli/filter" | ||
|
|
||
| "github.com/go-git/go-git/v6/plumbing" | ||
| ) | ||
|
|
||
| // SmudgeSessionContent applies the smudge filter to transcript and prompt | ||
| // fields of a SessionContent in place. Safe to call with nil content or pipeline. | ||
| func SmudgeSessionContent(content *SessionContent, pipeline *filter.Pipeline) { | ||
| if content == nil || pipeline == nil { | ||
| return | ||
| } | ||
| content.Transcript = pipeline.Smudge(content.Transcript) | ||
| content.Prompts = pipeline.SmudgeString(content.Prompts) | ||
| } | ||
|
|
||
| // ReadSessionContentForDisplay reads a session's content and applies the smudge | ||
| // filter so stored placeholders are replaced with machine-specific paths. | ||
| // Use this for user-facing output; use ReadSessionContent for internal operations. | ||
| func (s *GitStore) ReadSessionContentForDisplay(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*SessionContent, error) { | ||
| content, err := s.ReadSessionContent(ctx, checkpointID, sessionIndex) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| SmudgeSessionContent(content, filter.FromContext(ctx)) | ||
| return content, nil | ||
| } | ||
|
|
||
| // ReadLatestSessionContentForDisplay is the display variant of ReadLatestSessionContent. | ||
| func (s *GitStore) ReadLatestSessionContentForDisplay(ctx context.Context, checkpointID id.CheckpointID) (*SessionContent, error) { | ||
| content, err := s.ReadLatestSessionContent(ctx, checkpointID) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| SmudgeSessionContent(content, filter.FromContext(ctx)) | ||
| return content, nil | ||
| } | ||
|
|
||
| // GetSessionLogForDisplay is the display variant of GetSessionLog. | ||
| func (s *GitStore) GetSessionLogForDisplay(ctx context.Context, cpID id.CheckpointID) ([]byte, string, error) { | ||
| transcript, sessionID, err := s.GetSessionLog(ctx, cpID) | ||
| if err != nil { | ||
| return nil, "", err | ||
| } | ||
| pipeline := filter.FromContext(ctx) | ||
| return pipeline.Smudge(transcript), sessionID, nil | ||
| } | ||
|
|
||
| // LookupSessionLogForDisplay is a convenience function that opens the repository | ||
| // and retrieves a smudged session log by checkpoint ID. | ||
| func LookupSessionLogForDisplay(ctx context.Context, cpID id.CheckpointID) ([]byte, string, error) { | ||
| transcript, sessionID, err := LookupSessionLog(ctx, cpID) | ||
| if err != nil { | ||
| return nil, "", err | ||
| } | ||
| pipeline := filter.FromContext(ctx) | ||
| return pipeline.Smudge(transcript), sessionID, nil | ||
| } | ||
|
|
||
| // GetTranscriptFromCommitForDisplay is the display variant of GetTranscriptFromCommit. | ||
| func (s *GitStore) GetTranscriptFromCommitForDisplay(ctx context.Context, commitHash plumbing.Hash, metadataDir string, agentType types.AgentType) ([]byte, error) { | ||
| transcript, err := s.GetTranscriptFromCommit(ctx, commitHash, metadataDir, agentType) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| pipeline := filter.FromContext(ctx) | ||
| return pipeline.Smudge(transcript), nil | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ import ( | |
|
|
||
| "github.com/entireio/cli/cmd/entire/cli/agent" | ||
| "github.com/entireio/cli/cmd/entire/cli/agent/types" | ||
| "github.com/entireio/cli/cmd/entire/cli/filter" | ||
| "github.com/entireio/cli/cmd/entire/cli/jsonutil" | ||
| "github.com/entireio/cli/cmd/entire/cli/logging" | ||
| "github.com/entireio/cli/cmd/entire/cli/paths" | ||
|
|
@@ -106,8 +107,10 @@ func (s *GitStore) WriteTemporary(ctx context.Context, opts WriteTemporaryOption | |
| allDeletedFiles = opts.DeletedFiles | ||
| } | ||
|
|
||
| pipeline := filter.FromContext(ctx) | ||
|
|
||
| // Build tree with changes | ||
| treeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, allDeletedFiles, opts.MetadataDir, opts.MetadataDirAbs) | ||
| treeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, allDeletedFiles, opts.MetadataDir, opts.MetadataDirAbs, pipeline) | ||
| if err != nil { | ||
| return WriteTemporaryResult{}, fmt.Errorf("failed to build tree: %w", err) | ||
| } | ||
|
|
@@ -264,8 +267,8 @@ func (s *GitStore) WriteTemporaryTask(ctx context.Context, opts WriteTemporaryTa | |
| allFiles = append(allFiles, opts.ModifiedFiles...) | ||
| allFiles = append(allFiles, opts.NewFiles...) | ||
|
|
||
| // Build new tree with code changes (no metadata dir yet) | ||
| newTreeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, opts.DeletedFiles, "", "") | ||
| // Build new tree with code changes (no metadata dir yet; nil pipeline — task checkpoints have no metadata dir) | ||
| newTreeHash, err := s.buildTreeWithChanges(ctx, baseTreeHash, allFiles, opts.DeletedFiles, "", "", nil) | ||
| if err != nil { | ||
| return plumbing.ZeroHash, fmt.Errorf("failed to build tree: %w", err) | ||
| } | ||
|
|
@@ -302,14 +305,17 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb | |
| sessionMetadataDir := paths.EntireMetadataDir + "/" + opts.SessionID | ||
| taskMetadataDir := sessionMetadataDir + "/tasks/" + opts.ToolUseID | ||
|
|
||
| pipeline := filter.FromContext(ctx) | ||
|
|
||
| var changes []TreeChange | ||
|
|
||
| if opts.IsIncremental { | ||
| // Incremental checkpoint: only add the checkpoint file | ||
| var incData []byte | ||
| var err error | ||
| if opts.IncrementalData != nil { | ||
| incData, err = redact.JSONLBytes(opts.IncrementalData) | ||
| incData = pipeline.Clean(opts.IncrementalData) | ||
| incData, err = redact.JSONLBytes(incData) | ||
| if err != nil { | ||
| return plumbing.ZeroHash, fmt.Errorf("failed to redact incremental checkpoint: %w", err) | ||
| } | ||
|
|
@@ -379,6 +385,7 @@ func (s *GitStore) addTaskMetadataToTree(ctx context.Context, baseTreeHash plumb | |
| // Add subagent transcript if available | ||
| if opts.SubagentTranscriptPath != "" && opts.AgentID != "" { | ||
| if agentContent, readErr := os.ReadFile(opts.SubagentTranscriptPath); readErr == nil { | ||
| agentContent = pipeline.Clean(agentContent) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Task session transcript missing path normalization filteringMedium Severity In Additional Locations (1) |
||
| redacted, jsonlErr := redact.JSONLBytes(agentContent) | ||
| if jsonlErr != nil { | ||
| logging.Warn(ctx, "subagent transcript is not valid JSONL, falling back to plain redaction", | ||
|
|
@@ -710,6 +717,7 @@ func (s *GitStore) buildTreeWithChanges( | |
| baseTreeHash plumbing.Hash, | ||
| modifiedFiles, deletedFiles []string, | ||
| metadataDir, metadataDirAbs string, | ||
| pipeline *filter.Pipeline, | ||
| ) (plumbing.Hash, error) { | ||
| // Get worktree root for resolving file paths | ||
| // This is critical because fileExists() and createBlobFromFile() use os.Stat() | ||
|
|
@@ -754,7 +762,7 @@ func (s *GitStore) buildTreeWithChanges( | |
|
|
||
| // Metadata directory files | ||
| if metadataDir != "" && metadataDirAbs != "" { | ||
| metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir) | ||
| metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir, pipeline) | ||
| if metaErr != nil { | ||
| return plumbing.ZeroHash, fmt.Errorf("failed to add metadata directory: %w", metaErr) | ||
| } | ||
|
|
@@ -899,7 +907,7 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR | |
|
|
||
| // Use redacted blob creation for metadata files (transcripts, prompts, etc.) | ||
| // to ensure PII and secrets are redacted before writing to git. | ||
| blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath) | ||
| blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath, nil) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to create blob for %s: %w", path, err) | ||
| } | ||
|
|
@@ -925,7 +933,7 @@ type treeNode struct { | |
| // addDirectoryToChanges walks a filesystem directory and returns TreeChange entries | ||
| // for each file, suitable for use with ApplyTreeChanges. | ||
| // dirPathAbs is the absolute filesystem path; dirPathRel is the git tree-relative path. | ||
| func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string) ([]TreeChange, error) { | ||
| func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string, pipeline *filter.Pipeline) ([]TreeChange, error) { | ||
| var changes []TreeChange | ||
| err := filepath.Walk(dirPathAbs, func(path string, info os.FileInfo, err error) error { | ||
| if err != nil { | ||
|
|
@@ -958,7 +966,7 @@ func addDirectoryToChanges(repo *git.Repository, dirPathAbs, dirPathRel string) | |
|
|
||
| treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir)) | ||
|
|
||
| blobHash, mode, blobErr := createRedactedBlobFromFile(repo, path, treePath) | ||
| blobHash, mode, blobErr := createRedactedBlobFromFile(repo, path, treePath, pipeline) | ||
| if blobErr != nil { | ||
| return fmt.Errorf("failed to create blob for %s: %w", path, blobErr) | ||
| } | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
copyMetadataDir is documented as being used for “task checkpoints, subagent transcripts, etc.”, but it passes a nil filter pipeline into createRedactedBlobFromFile. This means any absolute paths inside those copied metadata files (e.g., task prompts, agent-*.jsonl) will bypass the new clean filter and be committed un-normalized, undermining the path/privacy filtering behavior elsewhere in the PR. Consider constructing a pipeline once (like in writeSessionToSubdirectory) and passing it through here, or selectively enabling filtering for transcript/prompt-like metadata files while keeping it disabled for files that must remain byte-identical.