From bd3106027d51a2af1188be979dd8e5b31dcabdd2 Mon Sep 17 00:00:00 2001 From: "minion[bot]" Date: Fri, 6 Mar 2026 19:33:01 +0000 Subject: [PATCH] Exclude gitignored files from session metadata tracking Automated by partio-io/minions (task: skip-gitignored-files-session-metadata) Co-Authored-By: Claude --- internal/checkpoint/metadata.go | 7 +- internal/git/diff_files.go | 25 +++++++ internal/git/gitignore.go | 45 ++++++++++++ internal/git/gitignore_test.go | 125 ++++++++++++++++++++++++++++++++ internal/hooks/postcommit.go | 14 +++- 5 files changed, 212 insertions(+), 4 deletions(-) create mode 100644 internal/git/diff_files.go create mode 100644 internal/git/gitignore.go create mode 100644 internal/git/gitignore_test.go diff --git a/internal/checkpoint/metadata.go b/internal/checkpoint/metadata.go index 344d30e..3597397 100644 --- a/internal/checkpoint/metadata.go +++ b/internal/checkpoint/metadata.go @@ -2,7 +2,8 @@ package checkpoint // SessionMetadata is stored per-session within a checkpoint directory. type SessionMetadata struct { - Agent string `json:"agent"` - TotalTokens int `json:"total_tokens"` - Duration string `json:"duration"` + Agent string `json:"agent"` + TotalTokens int `json:"total_tokens"` + Duration string `json:"duration"` + FilesModified []string `json:"files_modified,omitempty"` } diff --git a/internal/git/diff_files.go b/internal/git/diff_files.go new file mode 100644 index 0000000..8576dff --- /dev/null +++ b/internal/git/diff_files.go @@ -0,0 +1,25 @@ +package git + +import "strings" + +// DiffFiles returns the list of file paths modified in a commit. +func DiffFiles(commitHash string) ([]string, error) { + out, err := execGit("diff", "--name-only", commitHash+"~1", commitHash) + if err != nil { + // Retry against the empty tree for first commits (no parent). + out, err = execGit("diff", "--name-only", "4b825dc642cb6eb9a060e54bf899d69f82cf7ee2", commitHash) + if err != nil { + return nil, err + } + } + if out == "" { + return nil, nil + } + var files []string + for _, line := range strings.Split(out, "\n") { + if line != "" { + files = append(files, line) + } + } + return files, nil +} diff --git a/internal/git/gitignore.go b/internal/git/gitignore.go new file mode 100644 index 0000000..1690cb0 --- /dev/null +++ b/internal/git/gitignore.go @@ -0,0 +1,45 @@ +package git + +import ( + "errors" + "os/exec" + "strings" +) + +// FilterGitIgnored returns the subset of paths that are NOT gitignored. +// It uses git check-ignore which respects repo .gitignore files, nested +// .gitignore files, and the global gitignore (core.excludesFile). +// repoRoot must be the top-level directory of the git repository. +func FilterGitIgnored(repoRoot string, paths []string) ([]string, error) { + if len(paths) == 0 { + return paths, nil + } + + cmd := exec.Command("git", "check-ignore", "--stdin") + cmd.Dir = repoRoot + cmd.Stdin = strings.NewReader(strings.Join(paths, "\n") + "\n") + out, err := cmd.Output() + if err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + // Exit code 1 means no paths are ignored — return all paths unchanged. + return paths, nil + } + return nil, err + } + + ignored := make(map[string]bool) + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { + if line != "" { + ignored[line] = true + } + } + + result := make([]string, 0, len(paths)) + for _, p := range paths { + if !ignored[p] { + result = append(result, p) + } + } + return result, nil +} diff --git a/internal/git/gitignore_test.go b/internal/git/gitignore_test.go new file mode 100644 index 0000000..87f9418 --- /dev/null +++ b/internal/git/gitignore_test.go @@ -0,0 +1,125 @@ +package git + +import ( + "os" + "os/exec" + "path/filepath" + "testing" +) + +func TestFilterGitIgnored(t *testing.T) { + dir := t.TempDir() + + if out, err := exec.Command("git", "-C", dir, "init").CombinedOutput(); err != nil { + t.Fatalf("git init: %v\n%s", err, out) + } + + if err := os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("*.log\n"), 0o644); err != nil { + t.Fatalf("writing .gitignore: %v", err) + } + + paths := []string{"main.go", "debug.log", "server.go"} + got, err := FilterGitIgnored(dir, paths) + if err != nil { + t.Fatalf("FilterGitIgnored: %v", err) + } + + // debug.log matches *.log and must be absent from checkpoint metadata. + for _, p := range got { + if p == "debug.log" { + t.Errorf("gitignored path %q should be absent from checkpoint metadata", p) + } + } + + found := make(map[string]bool) + for _, p := range got { + found[p] = true + } + if !found["main.go"] { + t.Error("main.go should be present in checkpoint metadata") + } + if !found["server.go"] { + t.Error("server.go should be present in checkpoint metadata") + } +} + +func TestFilterGitIgnoredNoneIgnored(t *testing.T) { + dir := t.TempDir() + + if out, err := exec.Command("git", "-C", dir, "init").CombinedOutput(); err != nil { + t.Fatalf("git init: %v\n%s", err, out) + } + + if err := os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("*.log\n"), 0o644); err != nil { + t.Fatalf("writing .gitignore: %v", err) + } + + paths := []string{"main.go", "server.go"} + got, err := FilterGitIgnored(dir, paths) + if err != nil { + t.Fatalf("FilterGitIgnored: %v", err) + } + if len(got) != 2 { + t.Errorf("expected all 2 paths returned, got %v", got) + } +} + +func TestFilterGitIgnoredEmpty(t *testing.T) { + dir := t.TempDir() + + if out, err := exec.Command("git", "-C", dir, "init").CombinedOutput(); err != nil { + t.Fatalf("git init: %v\n%s", err, out) + } + + got, err := FilterGitIgnored(dir, nil) + if err != nil { + t.Fatalf("FilterGitIgnored with nil paths: %v", err) + } + if len(got) != 0 { + t.Errorf("expected empty result, got %v", got) + } +} + +func TestFilterGitIgnoredNestedGitignore(t *testing.T) { + dir := t.TempDir() + + if out, err := exec.Command("git", "-C", dir, "init").CombinedOutput(); err != nil { + t.Fatalf("git init: %v\n%s", err, out) + } + + // Root .gitignore + if err := os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("*.log\n"), 0o644); err != nil { + t.Fatalf("writing root .gitignore: %v", err) + } + + // Nested .gitignore in subdir/ + if err := os.MkdirAll(filepath.Join(dir, "subdir"), 0o755); err != nil { + t.Fatalf("mkdir subdir: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, "subdir", ".gitignore"), []byte("*.tmp\n"), 0o644); err != nil { + t.Fatalf("writing nested .gitignore: %v", err) + } + + paths := []string{"main.go", "debug.log", "subdir/cache.tmp", "subdir/code.go"} + got, err := FilterGitIgnored(dir, paths) + if err != nil { + t.Fatalf("FilterGitIgnored: %v", err) + } + + for _, p := range got { + if p == "debug.log" || p == "subdir/cache.tmp" { + t.Errorf("gitignored path %q should be absent from checkpoint metadata", p) + } + } + + found := make(map[string]bool) + for _, p := range got { + found[p] = true + } + if !found["main.go"] { + t.Error("main.go should be present in checkpoint metadata") + } + if !found["subdir/code.go"] { + t.Error("subdir/code.go should be present in checkpoint metadata") + } +} diff --git a/internal/hooks/postcommit.go b/internal/hooks/postcommit.go index 0cd65b1..47dd27e 100644 --- a/internal/hooks/postcommit.go +++ b/internal/hooks/postcommit.go @@ -97,13 +97,25 @@ func runPostCommit(repoRoot string, cfg config.Config) error { cp.PlanSlug = sessionData.PlanSlug } + // Collect modified files, filtering out gitignored paths. + var filesModified []string + if modFiles, err := git.DiffFiles(commitHash); err == nil { + filtered, ferr := git.FilterGitIgnored(repoRoot, modFiles) + if ferr == nil { + filesModified = filtered + } else { + filesModified = modFiles + } + } + // Prepare session files sessionFiles := &checkpoint.SessionFiles{ ContentHash: commitHash, Context: "", FullJSONL: "", Metadata: checkpoint.SessionMetadata{ - Agent: cfg.Agent, + Agent: cfg.Agent, + FilesModified: filesModified, }, Prompt: "", }