From f38294e729109744c956b6c42ca1a63d313aa4ce Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Mon, 15 Dec 2025 22:11:05 +0000
Subject: [PATCH 1/8] Add github workflow

---
 .github/workflows/index.yml |  80 ++++++++
 src/__init__.py             |  24 +++
 src/file_filter.py          | 123 +++++++++++
 src/github_client.py        | 307 ++++++++++++++++++++++++++++
 src/index_manager.py        | 395 ++++++++++++++++++++++++++++++++++++
 src/main.py                 | 167 +++++++++++++++
 src/models.py               | 131 ++++++++++++
 src/search.py               | 132 ++++++++++++
 8 files changed, 1359 insertions(+)
 create mode 100644 .github/workflows/index.yml
 create mode 100644 src/__init__.py
 create mode 100644 src/file_filter.py
 create mode 100644 src/github_client.py
 create mode 100644 src/index_manager.py
 create mode 100644 src/main.py
 create mode 100644 src/models.py
 create mode 100644 src/search.py

diff --git a/.github/workflows/index.yml b/.github/workflows/index.yml
new file mode 100644
index 0000000..43c349a
--- /dev/null
+++ b/.github/workflows/index.yml
@@ -0,0 +1,80 @@
+name: Index Repository
+
+on:
+  push:
+    branches:
+      - main
+      - develop
+      - 'feature/**'  # Index feature branches
+      - 'release/**'  # Index release branches
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: 'Branch to index (leave empty for current branch)'
+        required: false
+        type: string
+      force_full_reindex:
+        description: 'Force full re-index'
+        required: false
+        type: boolean
+        default: false
+
+jobs:
+  index:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Full history for comparison
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
+      - name: Restore index state
+        uses: actions/cache@v4
+        with:
+          path: .augment-index-state
+          # Use branch-specific cache key
+          key: augment-index-${{ github.ref_name }}-${{ github.sha }}
+          restore-keys: |
+            augment-index-${{ github.ref_name }}-
+
+      - name: Index repository
+        id: index
+        run: python src/main.py
+        env:
+          AUGMENT_API_TOKEN: ${{ secrets.AUGMENT_API_TOKEN }}
+          AUGMENT_API_URL: ${{ secrets.AUGMENT_API_URL }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          STORAGE_TYPE: file
+          # Branch-specific state path (automatically determined from GITHUB_REF)
+          # STATE_PATH is optional - defaults to .augment-index-state/{branch}/state.json
+          MAX_COMMITS: 100
+          MAX_FILES: 500
+
+      - name: Print results
+        if: always()
+        run: |
+          echo "Success: ${{ steps.index.outputs.success }}"
+          echo "Type: ${{ steps.index.outputs.type }}"
+          echo "Files Indexed: ${{ steps.index.outputs.files_indexed }}"
+          echo "Files Deleted: ${{ steps.index.outputs.files_deleted }}"
+          echo "Checkpoint ID: ${{ steps.index.outputs.checkpoint_id }}"
+          echo "Commit SHA: ${{ steps.index.outputs.commit_sha }}"
+
+      - name: Upload state artifact
+        if: success()
+        uses: actions/upload-artifact@v4
+        with:
+          name: index-state
+          path: .augment-index-state/
+          retention-days: 30
+
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..499dfe6
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,24 @@
+"""
+GitHub Action Repository Indexer
+
+A Python example showing how to index a GitHub repository using the Augment SDK
+Direct Mode with incremental updates.
+
+See README.md for usage instructions.
+"""
+
+from .models import FileChange, IndexConfig, IndexResult, IndexState
+from .file_filter import should_filter_file
+from .github_client import GitHubClient
+from .index_manager import IndexManager
+
+__all__ = [
+    "FileChange",
+    "IndexConfig", 
+    "IndexResult",
+    "IndexState",
+    "should_filter_file",
+    "GitHubClient",
+    "IndexManager",
+]
+
diff --git a/src/file_filter.py b/src/file_filter.py
new file mode 100644
index 0000000..88ab035
--- /dev/null
+++ b/src/file_filter.py
@@ -0,0 +1,123 @@
+"""
+File filtering logic for GitHub repository indexing.
+"""
+
+import re
+from pathlib import Path
+from typing import Optional
+
+# Keyish pattern regex - matches files that likely contain secrets/keys
+KEYISH_PATTERN = re.compile(
+    r'^(\.git|.*\.pem|.*\.key|.*\.pfx|.*\.p12|.*\.jks|.*\.keystore|.*\.pkcs12|.*\.crt|.*\.cer|id_rsa|id_ed25519|id_ecdsa|id_dsa)$'
+)
+
+# Default max file size in bytes (1 MB)
+DEFAULT_MAX_FILE_SIZE = 1024 * 1024  # 1 MB
+
+
+def always_ignore_path(path: str) -> bool:
+    """
+    Check if a path should always be ignored (security measure).
+
+    Args:
+        path: The file path to check.
+
+    Returns:
+        True if the path contains ".." and should be ignored.
+    """
+    return ".." in path
+
+
+def is_keyish_path(path: str) -> bool:
+    """
+    Check if a path matches the keyish pattern (secrets/keys).
+
+    Args:
+        path: The file path to check.
+
+    Returns:
+        True if the filename matches patterns for secret/key files.
+    """
+    # Extract filename from path
+    filename = Path(path).name
+    return bool(KEYISH_PATTERN.match(filename))
+
+
+def is_valid_file_size(size_bytes: int, max_file_size: int = DEFAULT_MAX_FILE_SIZE) -> bool:
+    """
+    Check if file size is valid for upload.
+
+    Args:
+        size_bytes: The size of the file in bytes.
+        max_file_size: Maximum allowed file size in bytes. Defaults to 1 MB.
+
+    Returns:
+        True if the file size is within the allowed limit.
+    """
+    return size_bytes <= max_file_size
+
+
+def is_valid_utf8(content: bytes) -> bool:
+    """
+    Check if file content is valid UTF-8 (not binary).
+
+    Args:
+        content: The file content as bytes.
+
+    Returns:
+        True if the content is valid UTF-8, False if it's binary or invalid.
+    """
+    try:
+        content.decode("utf-8")
+        return True
+    except UnicodeDecodeError:
+        return False
+
+
+def should_filter_file(
+    path: str,
+    content: bytes,
+    max_file_size: Optional[int] = None,
+) -> dict:
+    """
+    Check if a file should be filtered out.
+
+    Returns {"filtered": True, "reason": "..."} if file should be skipped.
+    Returns {"filtered": False} if file should be included.
+
+    Priority order (from file-filtering.md):
+        1. Path validation (contains "..")
+        2. File size check
+        3. .augmentignore rules (checked by caller)
+        4. Keyish patterns
+        5. .gitignore rules (checked by caller)
+        6. UTF-8 validation
+
+    Args:
+        path: The file path to check.
+        content: The file content as bytes.
+        max_file_size: Maximum allowed file size in bytes. Defaults to DEFAULT_MAX_FILE_SIZE.
+
+    Returns:
+        A dict with "filtered" (bool) and optionally "reason" (str) keys.
+    """
+    effective_max_size = max_file_size if max_file_size is not None else DEFAULT_MAX_FILE_SIZE
+
+    # 1. Check for ".." in path (security)
+    if always_ignore_path(path):
+        return {"filtered": True, "reason": "path_contains_dotdot"}
+
+    # 2. Check file size
+    if not is_valid_file_size(len(content), effective_max_size):
+        return {"filtered": True, "reason": f"file_too_large ({len(content)} bytes)"}
+
+    # 3. Check keyish patterns (secrets/keys)
+    if is_keyish_path(path):
+        return {"filtered": True, "reason": "keyish_pattern"}
+
+    # 4. Check UTF-8 validity (binary detection)
+    if not is_valid_utf8(content):
+        return {"filtered": True, "reason": "binary_file"}
+
+    return {"filtered": False}
+
diff --git a/src/github_client.py b/src/github_client.py
new file mode 100644
index 0000000..f69bd62
--- /dev/null
+++ b/src/github_client.py
@@ -0,0 +1,307 @@
+"""
+GitHub API client for fetching repository data.
+"""
+
+import io
+import tarfile
+
+import pathspec
+import requests
+from github import Github
+from github.GithubException import GithubException
+
+from .file_filter import should_filter_file
+from .models import FileChange
+
+
+class GitHubClient:
+    """GitHub API client for fetching repository data."""
+
+    def __init__(self, token: str) -> None:
+        """
+        Initialize the GitHub client with an authentication token.
+
+        Args:
+            token: GitHub personal access token or GitHub App token.
+        """
+        self._github = Github(token)
+        self._token = token
+
+    def resolve_ref(self, owner: str, repo: str, ref: str) -> str:
+        """
+        Resolve a ref (like "HEAD", "main", or a commit SHA) to a commit SHA.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to resolve.
+
+        Returns:
+            The full 40-character commit SHA.
+
+        Raises:
+            Exception: If the ref cannot be resolved.
+        """
+        try:
+            repository = self._github.get_repo(f"{owner}/{repo}")
+            commit = repository.get_commit(ref)
+            return commit.sha
+        except GithubException as error:
+            raise Exception(
+                f'Failed to resolve ref "{ref}" for {owner}/{repo}: {error}'
+            ) from error
+
+    def download_tarball(self, owner: str, repo: str, ref: str) -> dict[str, str]:
+        """
+        Download repository as tarball and extract files.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to download.
+
+        Returns:
+            Dictionary mapping file paths to their contents.
+        """
+        print(f"Downloading tarball for {owner}/{repo}@{ref}...")
+
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        tarball_url = repository.get_archive_link("tarball", ref)
+
+        # Download tarball (10 minute timeout to handle large repositories)
+        # Include auth header for private repos
+        headers = {"Authorization": f"Bearer {self._token}"}
+        response = requests.get(tarball_url, headers=headers, stream=True, timeout=600)
+        if not response.ok:
+            raise Exception(f"Failed to download tarball: {response.reason}")
+
+        # Load ignore patterns
+        augmentignore, gitignore = self._load_ignore_patterns(owner, repo, ref)
+
+        # Track filtering statistics
+        files: dict[str, str] = {}
+        total_files = 0
+        filtered_files = 0
+        filter_reasons: dict[str, int] = {}
+
+        # Extract files from tarball
+        tarball_data = io.BytesIO(response.content)
+        with tarfile.open(fileobj=tarball_data, mode="r:gz") as tar:
+            for member in tar.getmembers():
+                # Skip directories and symlinks
+                if not member.isfile():
+                    continue
+
+                total_files += 1
+
+                # Remove the root directory prefix (e.g., "owner-repo-sha/")
+                path_parts = member.name.split("/")
+                path_parts.pop(0)  # Remove first component
+                file_path = "/".join(path_parts)
+
+                if not file_path:
+                    continue
+
+                # Read file contents
+                file_obj = tar.extractfile(member)
+                if file_obj is None:
+                    continue
+                content_bytes = file_obj.read()
+
+                # Apply filtering in priority order:
+                # 1. .augmentignore
+                if augmentignore and augmentignore.match_file(file_path):
+                    filtered_files += 1
+                    filter_reasons["augmentignore"] = filter_reasons.get("augmentignore", 0) + 1
+                    continue
+
+                # 2. Path validation, file size, keyish patterns, UTF-8 validation
+                filter_result = should_filter_file(path=file_path, content=content_bytes)
+
+                if filter_result["filtered"]:
+                    filtered_files += 1
+                    reason = filter_result.get("reason", "unknown")
+                    filter_reasons[reason] = filter_reasons.get(reason, 0) + 1
+                    continue
+
+                # 3. .gitignore (checked last)
+                if gitignore and gitignore.match_file(file_path):
+                    filtered_files += 1
+                    filter_reasons["gitignore"] = filter_reasons.get("gitignore", 0) + 1
+                    continue
+
+                # File passed all filters
+                try:
+                    contents = content_bytes.decode("utf-8")
+                    files[file_path] = contents
+                except UnicodeDecodeError:
+                    # This should not happen if is_valid_utf8() is working correctly
+                    filtered_files += 1
+                    filter_reasons["decode_error"] = filter_reasons.get("decode_error", 0) + 1
+                    print(f"Warning: File {file_path} passed UTF-8 validation but failed to decode")
+
+        print(f"Extracted {len(files)} files from tarball")
+        print(f"Filtered {filtered_files} of {total_files} files. Reasons: {filter_reasons}")
+        return files
+
+    def compare_commits(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> dict:
+        """
+        Compare two commits and get file changes.
+        """
+        print(f"Comparing {base}...{head}...")
+
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        comparison = repository.compare(base, head)
+
+        files: list[FileChange] = []
+
+        for file in comparison.files:
+            change = FileChange(
+                path=file.filename,
+                status=self._map_github_status(file.status),
+                previousFilename=file.previous_filename,
+            )
+
+            # Download file contents for added/modified files
+            if change.status in ("added", "modified"):
+                try:
+                    contents = self.get_file_contents(owner, repo, file.filename, head)
+                    change.contents = contents
+                except Exception as error:
+                    print(f"Warning: Failed to download {file.filename}: {error}")
+
+            files.append(change)
+
+        return {
+            "files": files,
+            "commits": comparison.total_commits,
+            "totalChanges": len(comparison.files),
+        }
+
+    def get_file_contents(
+        self, owner: str, repo: str, path: str, ref: str
+    ) -> str:
+        """
+        Get file contents at a specific ref.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            path: File path within the repository.
+            ref: Git ref to get contents at.
+
+        Returns:
+            The file contents as a string.
+
+        Raises:
+            Exception: If the path is not a file.
+        """
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        content = repository.get_contents(path, ref)
+
+        if isinstance(content, list):
+            raise Exception(f"{path} is not a file")
+
+        return content.decoded_content.decode("utf-8")
+
+    def _load_ignore_patterns(
+        self, owner: str, repo: str, ref: str
+    ) -> tuple[pathspec.PathSpec | None, pathspec.PathSpec | None]:
+        """
+        Load .gitignore and .augmentignore patterns separately.
+
+        Returns both filters to maintain proper priority order:
+        .augmentignore → keyish → .gitignore
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to load patterns from.
+
+        Returns:
+            Tuple of (augmentignore, gitignore) PathSpec objects, or None if not found.
+        """
+        augmentignore: pathspec.PathSpec | None = None
+        gitignore: pathspec.PathSpec | None = None
+
+        # Try to load .gitignore
+        try:
+            gitignore_content = self.get_file_contents(owner, repo, ".gitignore", ref)
+            gitignore = pathspec.PathSpec.from_lines("gitwildmatch", gitignore_content.splitlines())
+        except Exception:
+            # .gitignore doesn't exist
+            pass
+
+        # Try to load .augmentignore
+        try:
+            augmentignore_content = self.get_file_contents(owner, repo, ".augmentignore", ref)
+            augmentignore = pathspec.PathSpec.from_lines("gitwildmatch", augmentignore_content.splitlines())
+        except Exception:
+            # .augmentignore doesn't exist
+            pass
+
+        return augmentignore, gitignore
+
+    def _map_github_status(self, status: str) -> str:
+        """
+        Map GitHub file status to our FileChange status.
+
+        Args:
+            status: GitHub file status string.
+
+        Returns:
+            Normalized status string.
+        """
+        status_map = {
+            "added": "added",
+            "modified": "modified",
+            "removed": "removed",
+            "renamed": "renamed",
+        }
+        return status_map.get(status, "modified")
+
+    def ignore_files_changed(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> bool:
+        """
+        Check if ignore files changed between commits.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            base: Base commit SHA.
+            head: Head commit SHA.
+
+        Returns:
+            True if .gitignore or .augmentignore changed, False otherwise.
+        """
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        comparison = repository.compare(base, head)
+
+        ignore_files = [".gitignore", ".augmentignore"]
+        return any(file.filename in ignore_files for file in comparison.files)
+
+    def is_force_push(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> bool:
+        """
+        Check if the push was a force push.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            base: Base commit SHA.
+            head: Head commit SHA.
+
+        Returns:
+            True if the push was a force push, False otherwise.
+        """
+        try:
+            repository = self._github.get_repo(f"{owner}/{repo}")
+            repository.compare(base, head)
+            return False
+        except GithubException:
+            # If comparison fails, it's likely a force push
+            return True
diff --git a/src/index_manager.py b/src/index_manager.py
new file mode 100644
index 0000000..c2bf48f
--- /dev/null
+++ b/src/index_manager.py
@@ -0,0 +1,395 @@
+"""
+Index Manager - Core indexing logic
+"""
+
+import json
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+from auggie_sdk.context import DirectContext, File
+
+from .github_client import GitHubClient
+from .models import FileChange, IndexConfig, IndexResult, IndexState, RepositoryInfo
+
+DEFAULT_MAX_COMMITS = 100
+DEFAULT_MAX_FILES = 500
+
+
+class IndexManager:
+    """Index Manager - Core indexing logic for GitHub repositories."""
+
+    def __init__(
+        self, context: DirectContext, config: IndexConfig, state_path: str
+    ) -> None:
+        """
+        Initialize the IndexManager.
+
+        Args:
+            context: DirectContext instance for indexing operations.
+            config: Configuration for the indexing operation.
+            state_path: Path to the state file for persistence.
+        """
+        self._context = context
+        self._config = config
+        self._state_path = state_path
+        self._github = GitHubClient(config.githubToken)
+
+    def resolve_commit_sha(self) -> None:
+        """
+        Resolve the current commit ref to an actual commit SHA.
+
+        This handles cases where GITHUB_SHA might be "HEAD" or a branch name.
+        Updates the config.currentCommit with the resolved SHA.
+        """
+        resolved_sha = self._github.resolve_ref(
+            self._config.owner, self._config.repo, self._config.currentCommit
+        )
+        self._config.currentCommit = resolved_sha
+
+    def _load_state(self) -> Optional[IndexState]:
+        """
+        Load index state from file system.
+
+        EXTENDING TO OTHER STORAGE BACKENDS:
+        Replace this method to load state from your preferred storage:
+        - Redis: Use redis-py client to GET the state JSON
+        - S3: Use boto3 to get_object from S3 bucket
+        - Database: Query your database for the state record
+
+        Example for Redis:
+            import redis
+            r = redis.Redis.from_url(redis_url)
+            data = r.get(state_key)
+            return json.loads(data) if data else None
+
+        Example for S3:
+            import boto3
+            s3 = boto3.client('s3')
+            response = s3.get_object(Bucket=bucket, Key=key)
+            data = response['Body'].read().decode('utf-8')
+            return json.loads(data)
+
+        Returns:
+            The loaded IndexState or None if the file doesn't exist.
+        """
+        try:
+            with open(self._state_path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            return None
+
+    def _save_state(self, state: IndexState) -> None:
+        """
+        Save index state to file system.
+
+        EXTENDING TO OTHER STORAGE BACKENDS:
+        Replace this method to save state to your preferred storage:
+        - Redis: Use redis-py client to SET the state JSON
+        - S3: Use boto3 to put_object to S3 bucket
+        - Database: Insert or update the state record in your database
+
+        Example for Redis:
+            import redis
+            r = redis.Redis.from_url(redis_url)
+            r.set(state_key, json.dumps(state))
+
+        Example for S3:
+            import boto3
+            s3 = boto3.client('s3')
+            s3.put_object(
+                Bucket=bucket,
+                Key=key,
+                Body=json.dumps(state),
+                ContentType='application/json'
+            )
+
+        Note: The state is just a JSON object (IndexState type) that can be
+        serialized and stored anywhere. For distributed systems, consider using
+        Redis or a database for shared state across multiple workers.
+
+        Args:
+            state: The IndexState to save.
+        """
+        # Ensure directory exists
+        Path(self._state_path).parent.mkdir(parents=True, exist_ok=True)
+
+        # Write state to file
+        with open(self._state_path, "w", encoding="utf-8") as f:
+            json.dump(state, f, indent=2)
+
+    def index(self) -> IndexResult:
+        """
+        Main indexing entry point.
+
+        Returns:
+            IndexResult with success status and indexing details.
+        """
+        print(
+            f"Starting index for {self._config.owner}/{self._config.repo}"
+            f"@{self._config.branch}"
+        )
+
+        try:
+            # Load previous state
+            previous_state = self._load_state()
+
+            # If we have previous state, we'll need to create a new context with the imported state
+            # For now, we'll handle this in the incremental update logic
+
+            # Determine if we need full re-index
+            should_reindex, reason = self._should_full_reindex(previous_state)
+
+            if should_reindex:
+                return self._full_reindex(reason)
+
+            # Perform incremental update
+            # previous_state is guaranteed to be non-null here
+            if not previous_state:
+                raise RuntimeError("previous_state should not be None at this point")
+            return self._incremental_update(previous_state)
+        except Exception as error:
+            print(f"Indexing failed: {error}")
+            return IndexResult(
+                success=False,
+                type="full",
+                filesIndexed=0,
+                filesDeleted=0,
+                checkpointId="",
+                commitSha=self._config.currentCommit,
+                error=str(error),
+            )
+
+    def _should_full_reindex(
+        self, previous_state: Optional[IndexState]
+    ) -> tuple[bool, Optional[str]]:
+        """
+        Determine if full re-index is needed.
+
+        Args:
+            previous_state: The previous index state, or None if first run.
+
+        Returns:
+            Tuple of (should_reindex, reason).
+        """
+        # No previous state - first run
+        if not previous_state:
+            return (True, "first_run")
+
+        # Different repository
+        if (
+            previous_state["repository"]["owner"] != self._config.owner
+            or previous_state["repository"]["name"] != self._config.repo
+        ):
+            return (True, "different_repository")
+
+        # Same commit - no changes
+        if previous_state["lastCommitSha"] == self._config.currentCommit:
+            print("No changes detected")
+            return (False, None)
+
+        # Check for force push
+        is_force_push = self._github.is_force_push(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        if is_force_push:
+            return (True, "force_push")
+
+        # Get comparison
+        comparison = self._github.compare_commits(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        # Too many commits
+        max_commits = self._config.maxCommits or DEFAULT_MAX_COMMITS
+        if comparison["commits"] > max_commits:
+            return (
+                True,
+                f"too_many_commits ({comparison['commits']} > {max_commits})",
+            )
+
+        # Too many file changes
+        max_files = self._config.maxFiles or DEFAULT_MAX_FILES
+        if comparison["totalChanges"] > max_files:
+            return (
+                True,
+                f"too_many_files ({comparison['totalChanges']} > {max_files})",
+            )
+
+        # Check if ignore files changed
+        ignore_changed = self._github.ignore_files_changed(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        if ignore_changed:
+            return (True, "ignore_files_changed")
+
+        return (False, None)
+
+    def _full_reindex(self, reason: Optional[str]) -> IndexResult:
+        """
+        Perform full repository re-index.
+
+        Args:
+            reason: The reason for the full re-index.
+
+        Returns:
+            IndexResult with the result of the full re-index.
+        """
+        print(f"Performing full re-index (reason: {reason or 'unknown'})")
+
+        # Download entire repository as tarball
+        files = self._github.download_tarball(
+            self._config.owner, self._config.repo, self._config.currentCommit
+        )
+
+        # Add all files to index
+        files_to_index = [
+            File(path=path, contents=contents) for path, contents in files.items()
+        ]
+
+        print(f"Adding {len(files_to_index)} files to index...")
+        self._context.add_to_index(files_to_index)
+
+        # Export DirectContext state
+        context_state = self._context.export()
+        context_state_dict = context_state.to_dict()
+
+        new_state: IndexState = {
+            "contextState": context_state_dict,
+            "lastCommitSha": self._config.currentCommit,
+            "repository": RepositoryInfo(
+                owner=self._config.owner,
+                name=self._config.repo,
+            ),
+        }
+
+        # Save state
+        self._save_state(new_state)
+
+        return IndexResult(
+            success=True,
+            type="full",
+            filesIndexed=len(files_to_index),
+            filesDeleted=0,
+            checkpointId=context_state.checkpoint_id or "",
+            commitSha=self._config.currentCommit,
+            reindexReason=reason,
+        )
+
+    def _incremental_update(self, previous_state: IndexState) -> IndexResult:
+        """
+        Perform incremental update.
+
+        Args:
+            previous_state: The previous index state.
+
+        Returns:
+            IndexResult with the result of the incremental update.
+        """
+        print("Performing incremental update...")
+
+        # Create a temporary file with the previous context state
+        # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open
+        temp_file = tempfile.NamedTemporaryFile(
+            mode="w", suffix=".json", prefix="github-indexer-incremental-", delete=False
+        )
+        temp_path = Path(temp_file.name)
+        try:
+            json.dump(previous_state["contextState"], temp_file, indent=2)
+            temp_file.close()  # Close before reading on Windows
+
+            # Create a new context from the previous state
+            self._context = DirectContext.import_from_file(
+                str(temp_path),
+                api_key=self._config.apiToken,
+                api_url=self._config.apiUrl,
+            )
+        finally:
+            temp_path.unlink(missing_ok=True)
+
+        # Get file changes
+        comparison = self._github.compare_commits(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        # Process changes
+        files_to_add, files_to_delete = self._process_file_changes(comparison["files"])
+
+        print(f"Adding {len(files_to_add)} files, deleting {len(files_to_delete)} files")
+
+        # Update index
+        if files_to_add:
+            self._context.add_to_index(files_to_add)
+
+        if files_to_delete:
+            self._context.remove_from_index(files_to_delete)
+
+        # Export DirectContext state
+        context_state = self._context.export()
+        context_state_dict = context_state.to_dict()
+
+        new_state: IndexState = {
+            "contextState": context_state_dict,
+            "lastCommitSha": self._config.currentCommit,
+            "repository": previous_state["repository"],
+        }
+
+        # Save state
+        self._save_state(new_state)
+
+        return IndexResult(
+            success=True,
+            type="incremental",
+            filesIndexed=len(files_to_add),
+            filesDeleted=len(files_to_delete),
+            checkpointId=context_state.checkpoint_id or "",
+            commitSha=self._config.currentCommit,
+        )
+
+    def _process_file_changes(
+        self, changes: list[FileChange]
+    ) -> tuple[list[File], list[str]]:
+        """
+        Process file changes and categorize them for indexing.
+
+        Args:
+            changes: List of file changes from the comparison.
+
+        Returns:
+            Tuple of (files_to_add, files_to_delete).
+        """
+        files_to_add: list[File] = []
+        files_to_delete: list[str] = []
+
+        for change in changes:
+            if change.status in ("added", "modified"):
+                if change.contents:
+                    files_to_add.append(
+                        File(path=change.path, contents=change.contents)
+                    )
+            elif change.status == "removed":
+                files_to_delete.append(change.path)
+            elif change.status == "renamed":
+                if change.previousFilename:
+                    files_to_delete.append(change.previousFilename)
+                if change.contents:
+                    files_to_add.append(
+                        File(path=change.path, contents=change.contents)
+                    )
+
+        return files_to_add, files_to_delete
+
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..fd10065
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""
+Main entry point for GitHub Action Indexer
+
+Usage:
+    cd examples/python-sdk/context
+    python -m github_action_indexer index
+"""
+
+import os
+import re
+import sys
+
+from auggie_sdk.context import DirectContext
+
+from .index_manager import IndexManager
+from .models import IndexConfig
+
+
+def get_api_credentials() -> tuple[str, str]:
+    """Get API credentials from environment variables."""
+    api_token = os.environ.get("AUGMENT_API_TOKEN")
+    if not api_token:
+        raise ValueError("AUGMENT_API_TOKEN environment variable is required")
+
+    api_url = os.environ.get("AUGMENT_API_URL")
+    if not api_url:
+        raise ValueError(
+            "AUGMENT_API_URL environment variable is required. Please set it to your "
+            "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')"
+        )
+
+    return api_token, api_url
+
+
+def parse_repository_info() -> tuple[str, str, str, str]:
+    """
+    Parse repository information from environment variables.
+    Returns (owner, repo, branch, current_commit).
+    """
+    repository = os.environ.get("GITHUB_REPOSITORY", "")
+    parts = repository.split("/")
+
+    if len(parts) != 2 or not parts[0] or not parts[1]:
+        raise ValueError('GITHUB_REPOSITORY must be in format "owner/repo"')
+
+    owner, repo = parts
+
+    # Extract branch name from GitHub ref
+    github_ref = os.environ.get("GITHUB_REF", "")
+    github_ref_name = os.environ.get("GITHUB_REF_NAME", "")
+
+    if github_ref.startswith("refs/heads/"):
+        branch = github_ref_name
+    elif github_ref.startswith("refs/tags/"):
+        branch = f"tag/{github_ref_name}"
+    elif github_ref_name:
+        branch = github_ref_name
+    else:
+        branch = os.environ.get("BRANCH", "main")
+
+    current_commit = os.environ.get("GITHUB_SHA", "")
+    if not current_commit:
+        raise ValueError("GITHUB_SHA environment variable is required")
+
+    return owner, repo, branch, current_commit
+
+
+def load_config() -> IndexConfig:
+    """Load configuration from environment variables."""
+    github_token = os.environ.get("GITHUB_TOKEN")
+    if not github_token:
+        raise ValueError("GITHUB_TOKEN environment variable is required")
+
+    api_token, api_url = get_api_credentials()
+    owner, repo, branch, current_commit = parse_repository_info()
+
+    max_commits = os.environ.get("MAX_COMMITS")
+    max_files = os.environ.get("MAX_FILES")
+
+    return IndexConfig(
+        apiToken=api_token,
+        apiUrl=api_url,
+        githubToken=github_token,
+        owner=owner,
+        repo=repo,
+        branch=branch,
+        currentCommit=current_commit,
+        maxCommits=int(max_commits) if max_commits else None,
+        maxFiles=int(max_files) if max_files else None,
+    )
+
+
+def get_state_path(branch: str) -> str:
+    """Get the state file path for the current branch."""
+    sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch)
+    return os.environ.get(
+        "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json"
+    )
+
+
+def main() -> None:
+    """Main function."""
+    print("GitHub Action Indexer - Starting...")
+
+    try:
+        # Load configuration
+        config = load_config()
+        state_path = get_state_path(config.branch)
+
+        print(f"Repository: {config.owner}/{config.repo}")
+        print(f"Branch: {config.branch}")
+        print(f"Commit ref: {config.currentCommit}")
+        print(f"State path: {state_path}")
+
+        # Create DirectContext
+        context = DirectContext.create(api_key=config.apiToken, api_url=config.apiUrl)
+
+        # Create index manager and resolve commit SHA
+        manager = IndexManager(context, config, state_path)
+        manager.resolve_commit_sha()
+
+        print(f"Resolved commit SHA: {config.currentCommit}")
+
+        # Perform indexing
+        result = manager.index()
+
+        # Print results
+        print("\n=== Indexing Results ===")
+        print(f"Success: {result.success}")
+        print(f"Type: {result.type}")
+        print(f"Files Indexed: {result.filesIndexed}")
+        print(f"Files Deleted: {result.filesDeleted}")
+        print(f"Checkpoint ID: {result.checkpointId}")
+        print(f"Commit SHA: {result.commitSha}")
+
+        if result.reindexReason:
+            print(f"Re-index Reason: {result.reindexReason}")
+
+        if result.error:
+            print(f"Error: {result.error}", file=sys.stderr)
+            sys.exit(1)
+
+        # Set GitHub Actions output
+        github_output = os.environ.get("GITHUB_OUTPUT")
+        if github_output:
+            output_lines = [
+                f"success={result.success}",
+                f"type={result.type}",
+                f"files_indexed={result.filesIndexed}",
+                f"files_deleted={result.filesDeleted}",
+                f"checkpoint_id={result.checkpointId}",
+                f"commit_sha={result.commitSha}",
+            ]
+            with open(github_output, "a") as f:
+                f.write("\n".join(output_lines) + "\n")
+
+        print("\nIndexing completed successfully!")
+
+    except Exception as error:
+        print(f"Fatal error: {error}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/src/models.py b/src/models.py
new file mode 100644
index 0000000..8b3dfc0
--- /dev/null
+++ b/src/models.py
@@ -0,0 +1,131 @@
+"""
+Types for the GitHub Action Indexer
+
+This module defines the data types used by the GitHub Action Indexer
+for tracking index state, file changes, configuration, and results.
+"""
+
+from dataclasses import dataclass
+from typing import Literal, Optional
+
+from typing_extensions import TypedDict
+
+from auggie_sdk.context.models import DirectContextState
+
+
+class RepositoryInfo(TypedDict):
+    """Repository information for index state."""
+
+    owner: str  # Repository owner
+    name: str  # Repository name
+
+
+class IndexState(TypedDict):
+    """
+    Persistent state for the GitHub Action Indexer.
+
+    This state is stored between indexing runs to enable incremental indexing.
+    """
+
+    contextState: DirectContextState
+    """DirectContext state (checkpoint, blobs, etc.)"""
+
+    lastCommitSha: str
+    """Last indexed commit SHA (must be a full 40-character SHA, not a ref like 'HEAD')"""
+
+    repository: RepositoryInfo
+    """Repository information - used to verify we're indexing the same repository"""
+
+
+@dataclass
+class FileChange:
+    """
+    Represents a file change detected between commits.
+
+    Used to track what files need to be indexed or removed from the index.
+    """
+
+    path: str
+    """File path"""
+
+    status: Literal["added", "modified", "removed", "renamed"]
+    """Change status: added, modified, removed, renamed"""
+
+    previousFilename: Optional[str] = None
+    """Previous filename (for renames)"""
+
+    contents: Optional[str] = None
+    """File contents (for added/modified files)"""
+
+    oldBlobName: Optional[str] = None
+    """Blob name from previous index (for modified/removed files)"""
+
+
+@dataclass
+class IndexConfig:
+    """
+    Configuration for the GitHub Action Indexer.
+
+    Contains all the settings needed to perform indexing of a GitHub repository.
+    """
+
+    apiToken: str
+    """Augment API token"""
+
+    apiUrl: str
+    """Augment API URL (provided via AUGMENT_API_URL env var)"""
+
+    githubToken: str
+    """GitHub token"""
+
+    owner: str
+    """Repository owner"""
+
+    repo: str
+    """Repository name"""
+
+    branch: str
+    """Branch to index"""
+
+    currentCommit: str
+    """Current commit SHA"""
+
+    maxCommits: Optional[int] = None
+    """Maximum commits before full re-index"""
+
+    maxFiles: Optional[int] = None
+    """Maximum file changes before full re-index"""
+
+
+@dataclass
+class IndexResult:
+    """
+    Result from an indexing operation.
+
+    Contains information about what was indexed and whether it was successful.
+    """
+
+    success: bool
+    """Whether indexing was successful"""
+
+    type: Literal["full", "incremental", "no-changes"]
+    """Type of indexing performed"""
+
+    filesIndexed: int
+    """Number of files indexed"""
+
+    filesDeleted: int
+    """Number of files deleted"""
+
+    checkpointId: str
+    """New checkpoint ID"""
+
+    commitSha: str
+    """Commit SHA that was indexed"""
+
+    error: Optional[str] = None
+    """Error message if failed"""
+
+    reindexReason: Optional[str] = None
+    """Reason for full re-index (if applicable)"""
+
diff --git a/src/search.py b/src/search.py
new file mode 100644
index 0000000..fdac426
--- /dev/null
+++ b/src/search.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""
+CLI tool to search the indexed repository
+
+Usage:
+    cd examples/python-sdk/context
+    python -m github_action_indexer search "your search query"
+    python -m github_action_indexer search "your search query" --max-chars 5000
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+from auggie_sdk.context import DirectContext
+
+from .models import IndexState
+
+
+def get_state_path() -> str:
+    """Get the state file path for the current branch."""
+    branch = os.environ.get("BRANCH", "main")
+    sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch)
+    return os.environ.get(
+        "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json"
+    )
+
+
+def load_state(state_path: str) -> Optional[IndexState]:
+    """Load index state from file system."""
+    try:
+        with open(state_path, "r") as f:
+            data = f.read()
+        return json.loads(data)
+    except FileNotFoundError:
+        return None
+
+
+def main() -> None:
+    """Main search function."""
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(
+        description="Search the indexed repository",
+        epilog='Example: python search.py "authentication functions"',
+    )
+    parser.add_argument("query", help="Search query")
+    parser.add_argument(
+        "--max-chars",
+        type=int,
+        help="Maximum number of characters in output",
+        dest="max_chars",
+    )
+    args = parser.parse_args()
+
+    # Get API credentials
+    api_token = os.environ.get("AUGMENT_API_TOKEN")
+    if not api_token:
+        print("Error: AUGMENT_API_TOKEN environment variable is required", file=sys.stderr)
+        sys.exit(1)
+
+    api_url = os.environ.get("AUGMENT_API_URL")
+    if not api_url:
+        print(
+            "Error: AUGMENT_API_URL environment variable is required. Please set it to your "
+            "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    print(f'Searching for: "{args.query}"')
+    if args.max_chars is not None:
+        print(f"Limiting results to max {args.max_chars} characters\n")
+    else:
+        print()
+
+    try:
+        # Load the index state first
+        state_path = get_state_path()
+        print(f"Loading index state from: {state_path}")
+        state = load_state(state_path)
+
+        if not state:
+            print("Error: No index state found. Run indexing first.", file=sys.stderr)
+            print("  python -m github_action_indexer index", file=sys.stderr)
+            sys.exit(1)
+
+        # Create a temporary file with the context state for import
+        # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open
+        temp_file = tempfile.NamedTemporaryFile(
+            mode="w", suffix=".json", prefix="github-indexer-state-", delete=False
+        )
+        temp_path = Path(temp_file.name)
+        try:
+            json.dump(state["contextState"], temp_file, indent=2)
+            temp_file.close()  # Close before reading on Windows
+
+            # Import state using DirectContext.import_from_file
+            context = DirectContext.import_from_file(
+                str(temp_path), api_key=api_token, api_url=api_url
+            )
+        finally:
+            temp_path.unlink(missing_ok=True)
+
+        file_count = len(state["contextState"].get("blobs", []))
+
+        print(f"Loaded index: {file_count} files indexed")
+        print(f"Repository: {state['repository']['owner']}/{state['repository']['name']}")
+        print(f"Last indexed commit: {state['lastCommitSha']}\n")
+
+        # Perform search with optional character limit
+        results = context.search(args.query, max_output_length=args.max_chars)
+
+        if not results or results.strip() == "":
+            print("No results found.")
+            return
+
+        print("Search results:\n")
+        print(results)
+
+    except Exception as error:
+        print(f"Search failed: {error}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+

From d4564b840194d25c58612cdc8aa37da24269d5ad Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Mon, 15 Dec 2025 22:28:37 +0000
Subject: [PATCH 2/8] Add the workflow requiremens

---
 requirements.txt | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..74d3b3a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,13 @@
+# GitHub Action Indexer dependencies
+# Core SDK (from parent package)
+# For running standalone, also install context_sdk from parent
+
+# GitHub API client
+PyGithub>=2.1.0
+
+# HTTP requests (for tarball download)
+requests>=2.25.0
+
+# Gitignore-style pattern matching
+pathspec>=0.11.0
+

From e23992122e7530b069ddbbe50f7f57d8ff1a34d7 Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Mon, 15 Dec 2025 22:32:19 +0000
Subject: [PATCH 3/8] Update requirements

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 74d3b3a..4d507b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,8 @@
 # GitHub Action Indexer dependencies
 # Core SDK (from parent package)
 # For running standalone, also install context_sdk from parent
+# Augment SDK for indexing and search
+auggie-sdk>=0.1.0
 
 # GitHub API client
 PyGithub>=2.1.0

From 5bec0e1b5f9476ecac0168a0beb1ef4458991acc Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Mon, 15 Dec 2025 22:51:40 +0000
Subject: [PATCH 4/8] Retry workflow

---
 .github/workflows/index.yml               | 2 +-
 {src => augment_indexer}/__init__.py      | 0
 {src => augment_indexer}/file_filter.py   | 0
 {src => augment_indexer}/github_client.py | 0
 {src => augment_indexer}/index_manager.py | 0
 {src => augment_indexer}/main.py          | 0
 {src => augment_indexer}/models.py        | 0
 {src => augment_indexer}/search.py        | 0
 8 files changed, 1 insertion(+), 1 deletion(-)
 rename {src => augment_indexer}/__init__.py (100%)
 rename {src => augment_indexer}/file_filter.py (100%)
 rename {src => augment_indexer}/github_client.py (100%)
 rename {src => augment_indexer}/index_manager.py (100%)
 rename {src => augment_indexer}/main.py (100%)
 rename {src => augment_indexer}/models.py (100%)
 rename {src => augment_indexer}/search.py (100%)

diff --git a/.github/workflows/index.yml b/.github/workflows/index.yml
index 43c349a..9d1811e 100644
--- a/.github/workflows/index.yml
+++ b/.github/workflows/index.yml
@@ -49,7 +49,7 @@ jobs:
 
       - name: Index repository
         id: index
-        run: python src/main.py
+        run: python -m augment_indexer.main
         env:
           AUGMENT_API_TOKEN: ${{ secrets.AUGMENT_API_TOKEN }}
           AUGMENT_API_URL: ${{ secrets.AUGMENT_API_URL }}
diff --git a/src/__init__.py b/augment_indexer/__init__.py
similarity index 100%
rename from src/__init__.py
rename to augment_indexer/__init__.py
diff --git a/src/file_filter.py b/augment_indexer/file_filter.py
similarity index 100%
rename from src/file_filter.py
rename to augment_indexer/file_filter.py
diff --git a/src/github_client.py b/augment_indexer/github_client.py
similarity index 100%
rename from src/github_client.py
rename to augment_indexer/github_client.py
diff --git a/src/index_manager.py b/augment_indexer/index_manager.py
similarity index 100%
rename from src/index_manager.py
rename to augment_indexer/index_manager.py
diff --git a/src/main.py b/augment_indexer/main.py
similarity index 100%
rename from src/main.py
rename to augment_indexer/main.py
diff --git a/src/models.py b/augment_indexer/models.py
similarity index 100%
rename from src/models.py
rename to augment_indexer/models.py
diff --git a/src/search.py b/augment_indexer/search.py
similarity index 100%
rename from src/search.py
rename to augment_indexer/search.py

From 480fd137b85df040d8657eac2f86e8488027c32d Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Mon, 15 Dec 2025 23:22:56 +0000
Subject: [PATCH 5/8] debug

---
 augment_indexer/index_manager.py | 7 ++++++-
 augment_indexer/main.py          | 5 ++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/augment_indexer/index_manager.py b/augment_indexer/index_manager.py
index c2bf48f..445893a 100644
--- a/augment_indexer/index_manager.py
+++ b/augment_indexer/index_manager.py
@@ -112,11 +112,16 @@ def _save_state(self, state: IndexState) -> None:
             state: The IndexState to save.
         """
         # Ensure directory exists
-        Path(self._state_path).parent.mkdir(parents=True, exist_ok=True)
+        # Path(self._state_path).parent.mkdir(parents=True, exist_ok=True)
+        state_dir = Path(self._state_path).parent
+        state_dir.mkdir(parents=True, exist_ok=True)
+        print(f"Saving state to {self._state_path}")
 
         # Write state to file
         with open(self._state_path, "w", encoding="utf-8") as f:
             json.dump(state, f, indent=2)
+        
+        print(f"State saved successfully ({Path(self._state_path).stat().st_size} bytes)")
 
     def index(self) -> IndexResult:
         """
diff --git a/augment_indexer/main.py b/augment_indexer/main.py
index fd10065..ce0046e 100644
--- a/augment_indexer/main.py
+++ b/augment_indexer/main.py
@@ -102,6 +102,7 @@ def get_state_path(branch: str) -> str:
 def main() -> None:
     """Main function."""
     print("GitHub Action Indexer - Starting...")
+    print(f"Current working directory: {os.getcwd()}")
 
     try:
         # Load configuration
@@ -111,7 +112,9 @@ def main() -> None:
         print(f"Repository: {config.owner}/{config.repo}")
         print(f"Branch: {config.branch}")
         print(f"Commit ref: {config.currentCommit}")
-        print(f"State path: {state_path}")
+        # print(f"State path: {state_path}")
+        print(f"State path (relative): {state_path}")
+        print(f"State path (absolute): {os.path.abspath(state_path)}")
 
         # Create DirectContext
         context = DirectContext.create(api_key=config.apiToken, api_url=config.apiUrl)

From 22149b7d8a487d270948d486735ad5457536ea56 Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Mon, 15 Dec 2025 23:29:44 +0000
Subject: [PATCH 6/8] include hidden files

---
 .github/workflows/index.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/index.yml b/.github/workflows/index.yml
index 9d1811e..6a1fec0 100644
--- a/.github/workflows/index.yml
+++ b/.github/workflows/index.yml
@@ -77,4 +77,5 @@ jobs:
           name: index-state
           path: .augment-index-state/
           retention-days: 30
+          include-hidden-files: true
 

From 3810f98a99ea7459102ffcf97e6b46499d92ead7 Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Tue, 16 Dec 2025 00:50:11 +0000
Subject: [PATCH 7/8] remove the augment_indexer

---
 .github/workflows/index.yml      |  81 -------
 augment_indexer/__init__.py      |  24 --
 augment_indexer/file_filter.py   | 123 ----------
 augment_indexer/github_client.py | 307 ------------------------
 augment_indexer/index_manager.py | 400 -------------------------------
 augment_indexer/main.py          | 170 -------------
 augment_indexer/models.py        | 131 ----------
 augment_indexer/search.py        | 132 ----------
 8 files changed, 1368 deletions(-)
 delete mode 100644 .github/workflows/index.yml
 delete mode 100644 augment_indexer/__init__.py
 delete mode 100644 augment_indexer/file_filter.py
 delete mode 100644 augment_indexer/github_client.py
 delete mode 100644 augment_indexer/index_manager.py
 delete mode 100644 augment_indexer/main.py
 delete mode 100644 augment_indexer/models.py
 delete mode 100644 augment_indexer/search.py

diff --git a/.github/workflows/index.yml b/.github/workflows/index.yml
deleted file mode 100644
index 6a1fec0..0000000
--- a/.github/workflows/index.yml
+++ /dev/null
@@ -1,81 +0,0 @@
-name: Index Repository
-
-on:
-  push:
-    branches:
-      - main
-      - develop
-      - 'feature/**'  # Index feature branches
-      - 'release/**'  # Index release branches
-  workflow_dispatch:
-    inputs:
-      branch:
-        description: 'Branch to index (leave empty for current branch)'
-        required: false
-        type: string
-      force_full_reindex:
-        description: 'Force full re-index'
-        required: false
-        type: boolean
-        default: false
-
-jobs:
-  index:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0  # Full history for comparison
-
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-          cache: 'pip'
-
-      - name: Install dependencies
-        run: pip install -r requirements.txt
-
-      - name: Restore index state
-        uses: actions/cache@v4
-        with:
-          path: .augment-index-state
-          # Use branch-specific cache key
-          key: augment-index-${{ github.ref_name }}-${{ github.sha }}
-          restore-keys: |
-            augment-index-${{ github.ref_name }}-
-
-      - name: Index repository
-        id: index
-        run: python -m augment_indexer.main
-        env:
-          AUGMENT_API_TOKEN: ${{ secrets.AUGMENT_API_TOKEN }}
-          AUGMENT_API_URL: ${{ secrets.AUGMENT_API_URL }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          STORAGE_TYPE: file
-          # Branch-specific state path (automatically determined from GITHUB_REF)
-          # STATE_PATH is optional - defaults to .augment-index-state/{branch}/state.json
-          MAX_COMMITS: 100
-          MAX_FILES: 500
-
-      - name: Print results
-        if: always()
-        run: |
-          echo "Success: ${{ steps.index.outputs.success }}"
-          echo "Type: ${{ steps.index.outputs.type }}"
-          echo "Files Indexed: ${{ steps.index.outputs.files_indexed }}"
-          echo "Files Deleted: ${{ steps.index.outputs.files_deleted }}"
-          echo "Checkpoint ID: ${{ steps.index.outputs.checkpoint_id }}"
-          echo "Commit SHA: ${{ steps.index.outputs.commit_sha }}"
-
-      - name: Upload state artifact
-        if: success()
-        uses: actions/upload-artifact@v4
-        with:
-          name: index-state
-          path: .augment-index-state/
-          retention-days: 30
-          include-hidden-files: true
-
diff --git a/augment_indexer/__init__.py b/augment_indexer/__init__.py
deleted file mode 100644
index 499dfe6..0000000
--- a/augment_indexer/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""
-GitHub Action Repository Indexer
-
-A Python example showing how to index a GitHub repository using the Augment SDK
-Direct Mode with incremental updates.
-
-See README.md for usage instructions.
-"""
-
-from .models import FileChange, IndexConfig, IndexResult, IndexState
-from .file_filter import should_filter_file
-from .github_client import GitHubClient
-from .index_manager import IndexManager
-
-__all__ = [
-    "FileChange",
-    "IndexConfig", 
-    "IndexResult",
-    "IndexState",
-    "should_filter_file",
-    "GitHubClient",
-    "IndexManager",
-]
-
diff --git a/augment_indexer/file_filter.py b/augment_indexer/file_filter.py
deleted file mode 100644
index 88ab035..0000000
--- a/augment_indexer/file_filter.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""
-File filtering logic for GitHub repository indexing.
-"""
-
-import re
-from pathlib import Path
-from typing import Optional
-
-# Keyish pattern regex - matches files that likely contain secrets/keys
-KEYISH_PATTERN = re.compile(
-    r'^(\.git|.*\.pem|.*\.key|.*\.pfx|.*\.p12|.*\.jks|.*\.keystore|.*\.pkcs12|.*\.crt|.*\.cer|id_rsa|id_ed25519|id_ecdsa|id_dsa)$'
-)
-
-# Default max file size in bytes (1 MB)
-DEFAULT_MAX_FILE_SIZE = 1024 * 1024  # 1 MB
-
-
-def always_ignore_path(path: str) -> bool:
-    """
-    Check if a path should always be ignored (security measure).
-
-    Args:
-        path: The file path to check.
-
-    Returns:
-        True if the path contains ".." and should be ignored.
-    """
-    return ".." in path
-
-
-def is_keyish_path(path: str) -> bool:
-    """
-    Check if a path matches the keyish pattern (secrets/keys).
-
-    Args:
-        path: The file path to check.
-
-    Returns:
-        True if the filename matches patterns for secret/key files.
-    """
-    # Extract filename from path
-    filename = Path(path).name
-    return bool(KEYISH_PATTERN.match(filename))
-
-
-def is_valid_file_size(size_bytes: int, max_file_size: int = DEFAULT_MAX_FILE_SIZE) -> bool:
-    """
-    Check if file size is valid for upload.
-
-    Args:
-        size_bytes: The size of the file in bytes.
-        max_file_size: Maximum allowed file size in bytes. Defaults to 1 MB.
-
-    Returns:
-        True if the file size is within the allowed limit.
-    """
-    return size_bytes <= max_file_size
-
-
-def is_valid_utf8(content: bytes) -> bool:
-    """
-    Check if file content is valid UTF-8 (not binary).
-
-    Args:
-        content: The file content as bytes.
-
-    Returns:
-        True if the content is valid UTF-8, False if it's binary or invalid.
-    """
-    try:
-        content.decode("utf-8")
-        return True
-    except UnicodeDecodeError:
-        return False
-
-
-def should_filter_file(
-    path: str,
-    content: bytes,
-    max_file_size: Optional[int] = None,
-) -> dict:
-    """
-    Check if a file should be filtered out.
-
-    Returns {"filtered": True, "reason": "..."} if file should be skipped.
-    Returns {"filtered": False} if file should be included.
-
-    Priority order (from file-filtering.md):
-        1. Path validation (contains "..")
-        2. File size check
-        3. .augmentignore rules (checked by caller)
-        4. Keyish patterns
-        5. .gitignore rules (checked by caller)
-        6. UTF-8 validation
-
-    Args:
-        path: The file path to check.
-        content: The file content as bytes.
-        max_file_size: Maximum allowed file size in bytes. Defaults to DEFAULT_MAX_FILE_SIZE.
-
-    Returns:
-        A dict with "filtered" (bool) and optionally "reason" (str) keys.
-    """
-    effective_max_size = max_file_size if max_file_size is not None else DEFAULT_MAX_FILE_SIZE
-
-    # 1. Check for ".." in path (security)
-    if always_ignore_path(path):
-        return {"filtered": True, "reason": "path_contains_dotdot"}
-
-    # 2. Check file size
-    if not is_valid_file_size(len(content), effective_max_size):
-        return {"filtered": True, "reason": f"file_too_large ({len(content)} bytes)"}
-
-    # 3. Check keyish patterns (secrets/keys)
-    if is_keyish_path(path):
-        return {"filtered": True, "reason": "keyish_pattern"}
-
-    # 4. Check UTF-8 validity (binary detection)
-    if not is_valid_utf8(content):
-        return {"filtered": True, "reason": "binary_file"}
-
-    return {"filtered": False}
-
diff --git a/augment_indexer/github_client.py b/augment_indexer/github_client.py
deleted file mode 100644
index f69bd62..0000000
--- a/augment_indexer/github_client.py
+++ /dev/null
@@ -1,307 +0,0 @@
-"""
-GitHub API client for fetching repository data.
-"""
-
-import io
-import tarfile
-
-import pathspec
-import requests
-from github import Github
-from github.GithubException import GithubException
-
-from .file_filter import should_filter_file
-from .models import FileChange
-
-
-class GitHubClient:
-    """GitHub API client for fetching repository data."""
-
-    def __init__(self, token: str) -> None:
-        """
-        Initialize the GitHub client with an authentication token.
-
-        Args:
-            token: GitHub personal access token or GitHub App token.
-        """
-        self._github = Github(token)
-        self._token = token
-
-    def resolve_ref(self, owner: str, repo: str, ref: str) -> str:
-        """
-        Resolve a ref (like "HEAD", "main", or a commit SHA) to a commit SHA.
-
-        Args:
-            owner: Repository owner.
-            repo: Repository name.
-            ref: Git ref to resolve.
-
-        Returns:
-            The full 40-character commit SHA.
-
-        Raises:
-            Exception: If the ref cannot be resolved.
-        """
-        try:
-            repository = self._github.get_repo(f"{owner}/{repo}")
-            commit = repository.get_commit(ref)
-            return commit.sha
-        except GithubException as error:
-            raise Exception(
-                f'Failed to resolve ref "{ref}" for {owner}/{repo}: {error}'
-            ) from error
-
-    def download_tarball(self, owner: str, repo: str, ref: str) -> dict[str, str]:
-        """
-        Download repository as tarball and extract files.
-
-        Args:
-            owner: Repository owner.
-            repo: Repository name.
-            ref: Git ref to download.
-
-        Returns:
-            Dictionary mapping file paths to their contents.
-        """
-        print(f"Downloading tarball for {owner}/{repo}@{ref}...")
-
-        repository = self._github.get_repo(f"{owner}/{repo}")
-        tarball_url = repository.get_archive_link("tarball", ref)
-
-        # Download tarball (10 minute timeout to handle large repositories)
-        # Include auth header for private repos
-        headers = {"Authorization": f"Bearer {self._token}"}
-        response = requests.get(tarball_url, headers=headers, stream=True, timeout=600)
-        if not response.ok:
-            raise Exception(f"Failed to download tarball: {response.reason}")
-
-        # Load ignore patterns
-        augmentignore, gitignore = self._load_ignore_patterns(owner, repo, ref)
-
-        # Track filtering statistics
-        files: dict[str, str] = {}
-        total_files = 0
-        filtered_files = 0
-        filter_reasons: dict[str, int] = {}
-
-        # Extract files from tarball
-        tarball_data = io.BytesIO(response.content)
-        with tarfile.open(fileobj=tarball_data, mode="r:gz") as tar:
-            for member in tar.getmembers():
-                # Skip directories and symlinks
-                if not member.isfile():
-                    continue
-
-                total_files += 1
-
-                # Remove the root directory prefix (e.g., "owner-repo-sha/")
-                path_parts = member.name.split("/")
-                path_parts.pop(0)  # Remove first component
-                file_path = "/".join(path_parts)
-
-                if not file_path:
-                    continue
-
-                # Read file contents
-                file_obj = tar.extractfile(member)
-                if file_obj is None:
-                    continue
-                content_bytes = file_obj.read()
-
-                # Apply filtering in priority order:
-                # 1. .augmentignore
-                if augmentignore and augmentignore.match_file(file_path):
-                    filtered_files += 1
-                    filter_reasons["augmentignore"] = filter_reasons.get("augmentignore", 0) + 1
-                    continue
-
-                # 2. Path validation, file size, keyish patterns, UTF-8 validation
-                filter_result = should_filter_file(path=file_path, content=content_bytes)
-
-                if filter_result["filtered"]:
-                    filtered_files += 1
-                    reason = filter_result.get("reason", "unknown")
-                    filter_reasons[reason] = filter_reasons.get(reason, 0) + 1
-                    continue
-
-                # 3. .gitignore (checked last)
-                if gitignore and gitignore.match_file(file_path):
-                    filtered_files += 1
-                    filter_reasons["gitignore"] = filter_reasons.get("gitignore", 0) + 1
-                    continue
-
-                # File passed all filters
-                try:
-                    contents = content_bytes.decode("utf-8")
-                    files[file_path] = contents
-                except UnicodeDecodeError:
-                    # This should not happen if is_valid_utf8() is working correctly
-                    filtered_files += 1
-                    filter_reasons["decode_error"] = filter_reasons.get("decode_error", 0) + 1
-                    print(f"Warning: File {file_path} passed UTF-8 validation but failed to decode")
-
-        print(f"Extracted {len(files)} files from tarball")
-        print(f"Filtered {filtered_files} of {total_files} files. Reasons: {filter_reasons}")
-        return files
-
-    def compare_commits(
-        self, owner: str, repo: str, base: str, head: str
-    ) -> dict:
-        """
-        Compare two commits and get file changes.
-        """
-        print(f"Comparing {base}...{head}...")
-
-        repository = self._github.get_repo(f"{owner}/{repo}")
-        comparison = repository.compare(base, head)
-
-        files: list[FileChange] = []
-
-        for file in comparison.files:
-            change = FileChange(
-                path=file.filename,
-                status=self._map_github_status(file.status),
-                previousFilename=file.previous_filename,
-            )
-
-            # Download file contents for added/modified files
-            if change.status in ("added", "modified"):
-                try:
-                    contents = self.get_file_contents(owner, repo, file.filename, head)
-                    change.contents = contents
-                except Exception as error:
-                    print(f"Warning: Failed to download {file.filename}: {error}")
-
-            files.append(change)
-
-        return {
-            "files": files,
-            "commits": comparison.total_commits,
-            "totalChanges": len(comparison.files),
-        }
-
-    def get_file_contents(
-        self, owner: str, repo: str, path: str, ref: str
-    ) -> str:
-        """
-        Get file contents at a specific ref.
-
-        Args:
-            owner: Repository owner.
-            repo: Repository name.
-            path: File path within the repository.
-            ref: Git ref to get contents at.
-
-        Returns:
-            The file contents as a string.
-
-        Raises:
-            Exception: If the path is not a file.
-        """
-        repository = self._github.get_repo(f"{owner}/{repo}")
-        content = repository.get_contents(path, ref)
-
-        if isinstance(content, list):
-            raise Exception(f"{path} is not a file")
-
-        return content.decoded_content.decode("utf-8")
-
-    def _load_ignore_patterns(
-        self, owner: str, repo: str, ref: str
-    ) -> tuple[pathspec.PathSpec | None, pathspec.PathSpec | None]:
-        """
-        Load .gitignore and .augmentignore patterns separately.
-
-        Returns both filters to maintain proper priority order:
-        .augmentignore → keyish → .gitignore
-
-        Args:
-            owner: Repository owner.
-            repo: Repository name.
-            ref: Git ref to load patterns from.
-
-        Returns:
-            Tuple of (augmentignore, gitignore) PathSpec objects, or None if not found.
-        """
-        augmentignore: pathspec.PathSpec | None = None
-        gitignore: pathspec.PathSpec | None = None
-
-        # Try to load .gitignore
-        try:
-            gitignore_content = self.get_file_contents(owner, repo, ".gitignore", ref)
-            gitignore = pathspec.PathSpec.from_lines("gitwildmatch", gitignore_content.splitlines())
-        except Exception:
-            # .gitignore doesn't exist
-            pass
-
-        # Try to load .augmentignore
-        try:
-            augmentignore_content = self.get_file_contents(owner, repo, ".augmentignore", ref)
-            augmentignore = pathspec.PathSpec.from_lines("gitwildmatch", augmentignore_content.splitlines())
-        except Exception:
-            # .augmentignore doesn't exist
-            pass
-
-        return augmentignore, gitignore
-
-    def _map_github_status(self, status: str) -> str:
-        """
-        Map GitHub file status to our FileChange status.
-
-        Args:
-            status: GitHub file status string.
-
-        Returns:
-            Normalized status string.
-        """
-        status_map = {
-            "added": "added",
-            "modified": "modified",
-            "removed": "removed",
-            "renamed": "renamed",
-        }
-        return status_map.get(status, "modified")
-
-    def ignore_files_changed(
-        self, owner: str, repo: str, base: str, head: str
-    ) -> bool:
-        """
-        Check if ignore files changed between commits.
-
-        Args:
-            owner: Repository owner.
-            repo: Repository name.
-            base: Base commit SHA.
-            head: Head commit SHA.
-
-        Returns:
-            True if .gitignore or .augmentignore changed, False otherwise.
-        """
-        repository = self._github.get_repo(f"{owner}/{repo}")
-        comparison = repository.compare(base, head)
-
-        ignore_files = [".gitignore", ".augmentignore"]
-        return any(file.filename in ignore_files for file in comparison.files)
-
-    def is_force_push(
-        self, owner: str, repo: str, base: str, head: str
-    ) -> bool:
-        """
-        Check if the push was a force push.
-
-        Args:
-            owner: Repository owner.
-            repo: Repository name.
-            base: Base commit SHA.
-            head: Head commit SHA.
-
-        Returns:
-            True if the push was a force push, False otherwise.
-        """
-        try:
-            repository = self._github.get_repo(f"{owner}/{repo}")
-            repository.compare(base, head)
-            return False
-        except GithubException:
-            # If comparison fails, it's likely a force push
-            return True
diff --git a/augment_indexer/index_manager.py b/augment_indexer/index_manager.py
deleted file mode 100644
index 445893a..0000000
--- a/augment_indexer/index_manager.py
+++ /dev/null
@@ -1,400 +0,0 @@
-"""
-Index Manager - Core indexing logic
-"""
-
-import json
-import tempfile
-from pathlib import Path
-from typing import Optional
-
-from auggie_sdk.context import DirectContext, File
-
-from .github_client import GitHubClient
-from .models import FileChange, IndexConfig, IndexResult, IndexState, RepositoryInfo
-
-DEFAULT_MAX_COMMITS = 100
-DEFAULT_MAX_FILES = 500
-
-
-class IndexManager:
-    """Index Manager - Core indexing logic for GitHub repositories."""
-
-    def __init__(
-        self, context: DirectContext, config: IndexConfig, state_path: str
-    ) -> None:
-        """
-        Initialize the IndexManager.
-
-        Args:
-            context: DirectContext instance for indexing operations.
-            config: Configuration for the indexing operation.
-            state_path: Path to the state file for persistence.
-        """
-        self._context = context
-        self._config = config
-        self._state_path = state_path
-        self._github = GitHubClient(config.githubToken)
-
-    def resolve_commit_sha(self) -> None:
-        """
-        Resolve the current commit ref to an actual commit SHA.
-
-        This handles cases where GITHUB_SHA might be "HEAD" or a branch name.
-        Updates the config.currentCommit with the resolved SHA.
-        """
-        resolved_sha = self._github.resolve_ref(
-            self._config.owner, self._config.repo, self._config.currentCommit
-        )
-        self._config.currentCommit = resolved_sha
-
-    def _load_state(self) -> Optional[IndexState]:
-        """
-        Load index state from file system.
-
-        EXTENDING TO OTHER STORAGE BACKENDS:
-        Replace this method to load state from your preferred storage:
-        - Redis: Use redis-py client to GET the state JSON
-        - S3: Use boto3 to get_object from S3 bucket
-        - Database: Query your database for the state record
-
-        Example for Redis:
-            import redis
-            r = redis.Redis.from_url(redis_url)
-            data = r.get(state_key)
-            return json.loads(data) if data else None
-
-        Example for S3:
-            import boto3
-            s3 = boto3.client('s3')
-            response = s3.get_object(Bucket=bucket, Key=key)
-            data = response['Body'].read().decode('utf-8')
-            return json.loads(data)
-
-        Returns:
-            The loaded IndexState or None if the file doesn't exist.
-        """
-        try:
-            with open(self._state_path, "r", encoding="utf-8") as f:
-                return json.load(f)
-        except FileNotFoundError:
-            return None
-
-    def _save_state(self, state: IndexState) -> None:
-        """
-        Save index state to file system.
-
-        EXTENDING TO OTHER STORAGE BACKENDS:
-        Replace this method to save state to your preferred storage:
-        - Redis: Use redis-py client to SET the state JSON
-        - S3: Use boto3 to put_object to S3 bucket
-        - Database: Insert or update the state record in your database
-
-        Example for Redis:
-            import redis
-            r = redis.Redis.from_url(redis_url)
-            r.set(state_key, json.dumps(state))
-
-        Example for S3:
-            import boto3
-            s3 = boto3.client('s3')
-            s3.put_object(
-                Bucket=bucket,
-                Key=key,
-                Body=json.dumps(state),
-                ContentType='application/json'
-            )
-
-        Note: The state is just a JSON object (IndexState type) that can be
-        serialized and stored anywhere. For distributed systems, consider using
-        Redis or a database for shared state across multiple workers.
-
-        Args:
-            state: The IndexState to save.
-        """
-        # Ensure directory exists
-        # Path(self._state_path).parent.mkdir(parents=True, exist_ok=True)
-        state_dir = Path(self._state_path).parent
-        state_dir.mkdir(parents=True, exist_ok=True)
-        print(f"Saving state to {self._state_path}")
-
-        # Write state to file
-        with open(self._state_path, "w", encoding="utf-8") as f:
-            json.dump(state, f, indent=2)
-        
-        print(f"State saved successfully ({Path(self._state_path).stat().st_size} bytes)")
-
-    def index(self) -> IndexResult:
-        """
-        Main indexing entry point.
-
-        Returns:
-            IndexResult with success status and indexing details.
-        """
-        print(
-            f"Starting index for {self._config.owner}/{self._config.repo}"
-            f"@{self._config.branch}"
-        )
-
-        try:
-            # Load previous state
-            previous_state = self._load_state()
-
-            # If we have previous state, we'll need to create a new context with the imported state
-            # For now, we'll handle this in the incremental update logic
-
-            # Determine if we need full re-index
-            should_reindex, reason = self._should_full_reindex(previous_state)
-
-            if should_reindex:
-                return self._full_reindex(reason)
-
-            # Perform incremental update
-            # previous_state is guaranteed to be non-null here
-            if not previous_state:
-                raise RuntimeError("previous_state should not be None at this point")
-            return self._incremental_update(previous_state)
-        except Exception as error:
-            print(f"Indexing failed: {error}")
-            return IndexResult(
-                success=False,
-                type="full",
-                filesIndexed=0,
-                filesDeleted=0,
-                checkpointId="",
-                commitSha=self._config.currentCommit,
-                error=str(error),
-            )
-
-    def _should_full_reindex(
-        self, previous_state: Optional[IndexState]
-    ) -> tuple[bool, Optional[str]]:
-        """
-        Determine if full re-index is needed.
-
-        Args:
-            previous_state: The previous index state, or None if first run.
-
-        Returns:
-            Tuple of (should_reindex, reason).
-        """
-        # No previous state - first run
-        if not previous_state:
-            return (True, "first_run")
-
-        # Different repository
-        if (
-            previous_state["repository"]["owner"] != self._config.owner
-            or previous_state["repository"]["name"] != self._config.repo
-        ):
-            return (True, "different_repository")
-
-        # Same commit - no changes
-        if previous_state["lastCommitSha"] == self._config.currentCommit:
-            print("No changes detected")
-            return (False, None)
-
-        # Check for force push
-        is_force_push = self._github.is_force_push(
-            self._config.owner,
-            self._config.repo,
-            previous_state["lastCommitSha"],
-            self._config.currentCommit,
-        )
-
-        if is_force_push:
-            return (True, "force_push")
-
-        # Get comparison
-        comparison = self._github.compare_commits(
-            self._config.owner,
-            self._config.repo,
-            previous_state["lastCommitSha"],
-            self._config.currentCommit,
-        )
-
-        # Too many commits
-        max_commits = self._config.maxCommits or DEFAULT_MAX_COMMITS
-        if comparison["commits"] > max_commits:
-            return (
-                True,
-                f"too_many_commits ({comparison['commits']} > {max_commits})",
-            )
-
-        # Too many file changes
-        max_files = self._config.maxFiles or DEFAULT_MAX_FILES
-        if comparison["totalChanges"] > max_files:
-            return (
-                True,
-                f"too_many_files ({comparison['totalChanges']} > {max_files})",
-            )
-
-        # Check if ignore files changed
-        ignore_changed = self._github.ignore_files_changed(
-            self._config.owner,
-            self._config.repo,
-            previous_state["lastCommitSha"],
-            self._config.currentCommit,
-        )
-
-        if ignore_changed:
-            return (True, "ignore_files_changed")
-
-        return (False, None)
-
-    def _full_reindex(self, reason: Optional[str]) -> IndexResult:
-        """
-        Perform full repository re-index.
-
-        Args:
-            reason: The reason for the full re-index.
-
-        Returns:
-            IndexResult with the result of the full re-index.
-        """
-        print(f"Performing full re-index (reason: {reason or 'unknown'})")
-
-        # Download entire repository as tarball
-        files = self._github.download_tarball(
-            self._config.owner, self._config.repo, self._config.currentCommit
-        )
-
-        # Add all files to index
-        files_to_index = [
-            File(path=path, contents=contents) for path, contents in files.items()
-        ]
-
-        print(f"Adding {len(files_to_index)} files to index...")
-        self._context.add_to_index(files_to_index)
-
-        # Export DirectContext state
-        context_state = self._context.export()
-        context_state_dict = context_state.to_dict()
-
-        new_state: IndexState = {
-            "contextState": context_state_dict,
-            "lastCommitSha": self._config.currentCommit,
-            "repository": RepositoryInfo(
-                owner=self._config.owner,
-                name=self._config.repo,
-            ),
-        }
-
-        # Save state
-        self._save_state(new_state)
-
-        return IndexResult(
-            success=True,
-            type="full",
-            filesIndexed=len(files_to_index),
-            filesDeleted=0,
-            checkpointId=context_state.checkpoint_id or "",
-            commitSha=self._config.currentCommit,
-            reindexReason=reason,
-        )
-
-    def _incremental_update(self, previous_state: IndexState) -> IndexResult:
-        """
-        Perform incremental update.
-
-        Args:
-            previous_state: The previous index state.
-
-        Returns:
-            IndexResult with the result of the incremental update.
-        """
-        print("Performing incremental update...")
-
-        # Create a temporary file with the previous context state
-        # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open
-        temp_file = tempfile.NamedTemporaryFile(
-            mode="w", suffix=".json", prefix="github-indexer-incremental-", delete=False
-        )
-        temp_path = Path(temp_file.name)
-        try:
-            json.dump(previous_state["contextState"], temp_file, indent=2)
-            temp_file.close()  # Close before reading on Windows
-
-            # Create a new context from the previous state
-            self._context = DirectContext.import_from_file(
-                str(temp_path),
-                api_key=self._config.apiToken,
-                api_url=self._config.apiUrl,
-            )
-        finally:
-            temp_path.unlink(missing_ok=True)
-
-        # Get file changes
-        comparison = self._github.compare_commits(
-            self._config.owner,
-            self._config.repo,
-            previous_state["lastCommitSha"],
-            self._config.currentCommit,
-        )
-
-        # Process changes
-        files_to_add, files_to_delete = self._process_file_changes(comparison["files"])
-
-        print(f"Adding {len(files_to_add)} files, deleting {len(files_to_delete)} files")
-
-        # Update index
-        if files_to_add:
-            self._context.add_to_index(files_to_add)
-
-        if files_to_delete:
-            self._context.remove_from_index(files_to_delete)
-
-        # Export DirectContext state
-        context_state = self._context.export()
-        context_state_dict = context_state.to_dict()
-
-        new_state: IndexState = {
-            "contextState": context_state_dict,
-            "lastCommitSha": self._config.currentCommit,
-            "repository": previous_state["repository"],
-        }
-
-        # Save state
-        self._save_state(new_state)
-
-        return IndexResult(
-            success=True,
-            type="incremental",
-            filesIndexed=len(files_to_add),
-            filesDeleted=len(files_to_delete),
-            checkpointId=context_state.checkpoint_id or "",
-            commitSha=self._config.currentCommit,
-        )
-
-    def _process_file_changes(
-        self, changes: list[FileChange]
-    ) -> tuple[list[File], list[str]]:
-        """
-        Process file changes and categorize them for indexing.
-
-        Args:
-            changes: List of file changes from the comparison.
-
-        Returns:
-            Tuple of (files_to_add, files_to_delete).
-        """
-        files_to_add: list[File] = []
-        files_to_delete: list[str] = []
-
-        for change in changes:
-            if change.status in ("added", "modified"):
-                if change.contents:
-                    files_to_add.append(
-                        File(path=change.path, contents=change.contents)
-                    )
-            elif change.status == "removed":
-                files_to_delete.append(change.path)
-            elif change.status == "renamed":
-                if change.previousFilename:
-                    files_to_delete.append(change.previousFilename)
-                if change.contents:
-                    files_to_add.append(
-                        File(path=change.path, contents=change.contents)
-                    )
-
-        return files_to_add, files_to_delete
-
diff --git a/augment_indexer/main.py b/augment_indexer/main.py
deleted file mode 100644
index ce0046e..0000000
--- a/augment_indexer/main.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-"""
-Main entry point for GitHub Action Indexer
-
-Usage:
-    cd examples/python-sdk/context
-    python -m github_action_indexer index
-"""
-
-import os
-import re
-import sys
-
-from auggie_sdk.context import DirectContext
-
-from .index_manager import IndexManager
-from .models import IndexConfig
-
-
-def get_api_credentials() -> tuple[str, str]:
-    """Get API credentials from environment variables."""
-    api_token = os.environ.get("AUGMENT_API_TOKEN")
-    if not api_token:
-        raise ValueError("AUGMENT_API_TOKEN environment variable is required")
-
-    api_url = os.environ.get("AUGMENT_API_URL")
-    if not api_url:
-        raise ValueError(
-            "AUGMENT_API_URL environment variable is required. Please set it to your "
-            "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')"
-        )
-
-    return api_token, api_url
-
-
-def parse_repository_info() -> tuple[str, str, str, str]:
-    """
-    Parse repository information from environment variables.
-    Returns (owner, repo, branch, current_commit).
-    """
-    repository = os.environ.get("GITHUB_REPOSITORY", "")
-    parts = repository.split("/")
-
-    if len(parts) != 2 or not parts[0] or not parts[1]:
-        raise ValueError('GITHUB_REPOSITORY must be in format "owner/repo"')
-
-    owner, repo = parts
-
-    # Extract branch name from GitHub ref
-    github_ref = os.environ.get("GITHUB_REF", "")
-    github_ref_name = os.environ.get("GITHUB_REF_NAME", "")
-
-    if github_ref.startswith("refs/heads/"):
-        branch = github_ref_name
-    elif github_ref.startswith("refs/tags/"):
-        branch = f"tag/{github_ref_name}"
-    elif github_ref_name:
-        branch = github_ref_name
-    else:
-        branch = os.environ.get("BRANCH", "main")
-
-    current_commit = os.environ.get("GITHUB_SHA", "")
-    if not current_commit:
-        raise ValueError("GITHUB_SHA environment variable is required")
-
-    return owner, repo, branch, current_commit
-
-
-def load_config() -> IndexConfig:
-    """Load configuration from environment variables."""
-    github_token = os.environ.get("GITHUB_TOKEN")
-    if not github_token:
-        raise ValueError("GITHUB_TOKEN environment variable is required")
-
-    api_token, api_url = get_api_credentials()
-    owner, repo, branch, current_commit = parse_repository_info()
-
-    max_commits = os.environ.get("MAX_COMMITS")
-    max_files = os.environ.get("MAX_FILES")
-
-    return IndexConfig(
-        apiToken=api_token,
-        apiUrl=api_url,
-        githubToken=github_token,
-        owner=owner,
-        repo=repo,
-        branch=branch,
-        currentCommit=current_commit,
-        maxCommits=int(max_commits) if max_commits else None,
-        maxFiles=int(max_files) if max_files else None,
-    )
-
-
-def get_state_path(branch: str) -> str:
-    """Get the state file path for the current branch."""
-    sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch)
-    return os.environ.get(
-        "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json"
-    )
-
-
-def main() -> None:
-    """Main function."""
-    print("GitHub Action Indexer - Starting...")
-    print(f"Current working directory: {os.getcwd()}")
-
-    try:
-        # Load configuration
-        config = load_config()
-        state_path = get_state_path(config.branch)
-
-        print(f"Repository: {config.owner}/{config.repo}")
-        print(f"Branch: {config.branch}")
-        print(f"Commit ref: {config.currentCommit}")
-        # print(f"State path: {state_path}")
-        print(f"State path (relative): {state_path}")
-        print(f"State path (absolute): {os.path.abspath(state_path)}")
-
-        # Create DirectContext
-        context = DirectContext.create(api_key=config.apiToken, api_url=config.apiUrl)
-
-        # Create index manager and resolve commit SHA
-        manager = IndexManager(context, config, state_path)
-        manager.resolve_commit_sha()
-
-        print(f"Resolved commit SHA: {config.currentCommit}")
-
-        # Perform indexing
-        result = manager.index()
-
-        # Print results
-        print("\n=== Indexing Results ===")
-        print(f"Success: {result.success}")
-        print(f"Type: {result.type}")
-        print(f"Files Indexed: {result.filesIndexed}")
-        print(f"Files Deleted: {result.filesDeleted}")
-        print(f"Checkpoint ID: {result.checkpointId}")
-        print(f"Commit SHA: {result.commitSha}")
-
-        if result.reindexReason:
-            print(f"Re-index Reason: {result.reindexReason}")
-
-        if result.error:
-            print(f"Error: {result.error}", file=sys.stderr)
-            sys.exit(1)
-
-        # Set GitHub Actions output
-        github_output = os.environ.get("GITHUB_OUTPUT")
-        if github_output:
-            output_lines = [
-                f"success={result.success}",
-                f"type={result.type}",
-                f"files_indexed={result.filesIndexed}",
-                f"files_deleted={result.filesDeleted}",
-                f"checkpoint_id={result.checkpointId}",
-                f"commit_sha={result.commitSha}",
-            ]
-            with open(github_output, "a") as f:
-                f.write("\n".join(output_lines) + "\n")
-
-        print("\nIndexing completed successfully!")
-
-    except Exception as error:
-        print(f"Fatal error: {error}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
-
diff --git a/augment_indexer/models.py b/augment_indexer/models.py
deleted file mode 100644
index 8b3dfc0..0000000
--- a/augment_indexer/models.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""
-Types for the GitHub Action Indexer
-
-This module defines the data types used by the GitHub Action Indexer
-for tracking index state, file changes, configuration, and results.
-"""
-
-from dataclasses import dataclass
-from typing import Literal, Optional
-
-from typing_extensions import TypedDict
-
-from auggie_sdk.context.models import DirectContextState
-
-
-class RepositoryInfo(TypedDict):
-    """Repository information for index state."""
-
-    owner: str  # Repository owner
-    name: str  # Repository name
-
-
-class IndexState(TypedDict):
-    """
-    Persistent state for the GitHub Action Indexer.
-
-    This state is stored between indexing runs to enable incremental indexing.
-    """
-
-    contextState: DirectContextState
-    """DirectContext state (checkpoint, blobs, etc.)"""
-
-    lastCommitSha: str
-    """Last indexed commit SHA (must be a full 40-character SHA, not a ref like 'HEAD')"""
-
-    repository: RepositoryInfo
-    """Repository information - used to verify we're indexing the same repository"""
-
-
-@dataclass
-class FileChange:
-    """
-    Represents a file change detected between commits.
-
-    Used to track what files need to be indexed or removed from the index.
-    """
-
-    path: str
-    """File path"""
-
-    status: Literal["added", "modified", "removed", "renamed"]
-    """Change status: added, modified, removed, renamed"""
-
-    previousFilename: Optional[str] = None
-    """Previous filename (for renames)"""
-
-    contents: Optional[str] = None
-    """File contents (for added/modified files)"""
-
-    oldBlobName: Optional[str] = None
-    """Blob name from previous index (for modified/removed files)"""
-
-
-@dataclass
-class IndexConfig:
-    """
-    Configuration for the GitHub Action Indexer.
-
-    Contains all the settings needed to perform indexing of a GitHub repository.
-    """
-
-    apiToken: str
-    """Augment API token"""
-
-    apiUrl: str
-    """Augment API URL (provided via AUGMENT_API_URL env var)"""
-
-    githubToken: str
-    """GitHub token"""
-
-    owner: str
-    """Repository owner"""
-
-    repo: str
-    """Repository name"""
-
-    branch: str
-    """Branch to index"""
-
-    currentCommit: str
-    """Current commit SHA"""
-
-    maxCommits: Optional[int] = None
-    """Maximum commits before full re-index"""
-
-    maxFiles: Optional[int] = None
-    """Maximum file changes before full re-index"""
-
-
-@dataclass
-class IndexResult:
-    """
-    Result from an indexing operation.
-
-    Contains information about what was indexed and whether it was successful.
-    """
-
-    success: bool
-    """Whether indexing was successful"""
-
-    type: Literal["full", "incremental", "no-changes"]
-    """Type of indexing performed"""
-
-    filesIndexed: int
-    """Number of files indexed"""
-
-    filesDeleted: int
-    """Number of files deleted"""
-
-    checkpointId: str
-    """New checkpoint ID"""
-
-    commitSha: str
-    """Commit SHA that was indexed"""
-
-    error: Optional[str] = None
-    """Error message if failed"""
-
-    reindexReason: Optional[str] = None
-    """Reason for full re-index (if applicable)"""
-
diff --git a/augment_indexer/search.py b/augment_indexer/search.py
deleted file mode 100644
index fdac426..0000000
--- a/augment_indexer/search.py
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/usr/bin/env python3
-"""
-CLI tool to search the indexed repository
-
-Usage:
-    cd examples/python-sdk/context
-    python -m github_action_indexer search "your search query"
-    python -m github_action_indexer search "your search query" --max-chars 5000
-"""
-
-import argparse
-import json
-import os
-import re
-import sys
-import tempfile
-from pathlib import Path
-from typing import Optional
-
-from auggie_sdk.context import DirectContext
-
-from .models import IndexState
-
-
-def get_state_path() -> str:
-    """Get the state file path for the current branch."""
-    branch = os.environ.get("BRANCH", "main")
-    sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch)
-    return os.environ.get(
-        "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json"
-    )
-
-
-def load_state(state_path: str) -> Optional[IndexState]:
-    """Load index state from file system."""
-    try:
-        with open(state_path, "r") as f:
-            data = f.read()
-        return json.loads(data)
-    except FileNotFoundError:
-        return None
-
-
-def main() -> None:
-    """Main search function."""
-    # Parse command line arguments
-    parser = argparse.ArgumentParser(
-        description="Search the indexed repository",
-        epilog='Example: python search.py "authentication functions"',
-    )
-    parser.add_argument("query", help="Search query")
-    parser.add_argument(
-        "--max-chars",
-        type=int,
-        help="Maximum number of characters in output",
-        dest="max_chars",
-    )
-    args = parser.parse_args()
-
-    # Get API credentials
-    api_token = os.environ.get("AUGMENT_API_TOKEN")
-    if not api_token:
-        print("Error: AUGMENT_API_TOKEN environment variable is required", file=sys.stderr)
-        sys.exit(1)
-
-    api_url = os.environ.get("AUGMENT_API_URL")
-    if not api_url:
-        print(
-            "Error: AUGMENT_API_URL environment variable is required. Please set it to your "
-            "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-
-    print(f'Searching for: "{args.query}"')
-    if args.max_chars is not None:
-        print(f"Limiting results to max {args.max_chars} characters\n")
-    else:
-        print()
-
-    try:
-        # Load the index state first
-        state_path = get_state_path()
-        print(f"Loading index state from: {state_path}")
-        state = load_state(state_path)
-
-        if not state:
-            print("Error: No index state found. Run indexing first.", file=sys.stderr)
-            print("  python -m github_action_indexer index", file=sys.stderr)
-            sys.exit(1)
-
-        # Create a temporary file with the context state for import
-        # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open
-        temp_file = tempfile.NamedTemporaryFile(
-            mode="w", suffix=".json", prefix="github-indexer-state-", delete=False
-        )
-        temp_path = Path(temp_file.name)
-        try:
-            json.dump(state["contextState"], temp_file, indent=2)
-            temp_file.close()  # Close before reading on Windows
-
-            # Import state using DirectContext.import_from_file
-            context = DirectContext.import_from_file(
-                str(temp_path), api_key=api_token, api_url=api_url
-            )
-        finally:
-            temp_path.unlink(missing_ok=True)
-
-        file_count = len(state["contextState"].get("blobs", []))
-
-        print(f"Loaded index: {file_count} files indexed")
-        print(f"Repository: {state['repository']['owner']}/{state['repository']['name']}")
-        print(f"Last indexed commit: {state['lastCommitSha']}\n")
-
-        # Perform search with optional character limit
-        results = context.search(args.query, max_output_length=args.max_chars)
-
-        if not results or results.strip() == "":
-            print("No results found.")
-            return
-
-        print("Search results:\n")
-        print(results)
-
-    except Exception as error:
-        print(f"Search failed: {error}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
-

From 3d5728008197dac308c6c8d2d8c9f95255815222 Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Tue, 16 Dec 2025 00:52:37 +0000
Subject: [PATCH 8/8] new indexer

---
 .github/workflows/augment-index.yml           |  81 ++++
 .gitignore                                    |   3 +
 augment_indexer/__init__.py                   |  24 ++
 .../__pycache__/__init__.cpython-311.pyc      | Bin 0 -> 794 bytes
 .../__pycache__/file_filter.cpython-311.pyc   | Bin 0 -> 4189 bytes
 .../__pycache__/github_client.cpython-311.pyc | Bin 0 -> 13124 bytes
 .../__pycache__/index_manager.cpython-311.pyc | Bin 0 -> 15403 bytes
 .../__pycache__/main.cpython-311.pyc          | Bin 0 -> 7988 bytes
 .../__pycache__/models.cpython-311.pyc        | Bin 0 -> 3886 bytes
 augment_indexer/file_filter.py                | 123 ++++++
 augment_indexer/github_client.py              | 307 ++++++++++++++
 augment_indexer/index_manager.py              | 395 ++++++++++++++++++
 augment_indexer/main.py                       | 167 ++++++++
 augment_indexer/models.py                     | 131 ++++++
 augment_indexer/requirements.txt              |  14 +
 augment_indexer/search.py                     | 132 ++++++
 16 files changed, 1377 insertions(+)
 create mode 100644 .github/workflows/augment-index.yml
 create mode 100644 .gitignore
 create mode 100644 augment_indexer/__init__.py
 create mode 100644 augment_indexer/__pycache__/__init__.cpython-311.pyc
 create mode 100644 augment_indexer/__pycache__/file_filter.cpython-311.pyc
 create mode 100644 augment_indexer/__pycache__/github_client.cpython-311.pyc
 create mode 100644 augment_indexer/__pycache__/index_manager.cpython-311.pyc
 create mode 100644 augment_indexer/__pycache__/main.cpython-311.pyc
 create mode 100644 augment_indexer/__pycache__/models.cpython-311.pyc
 create mode 100644 augment_indexer/file_filter.py
 create mode 100644 augment_indexer/github_client.py
 create mode 100644 augment_indexer/index_manager.py
 create mode 100644 augment_indexer/main.py
 create mode 100644 augment_indexer/models.py
 create mode 100644 augment_indexer/requirements.txt
 create mode 100644 augment_indexer/search.py

diff --git a/.github/workflows/augment-index.yml b/.github/workflows/augment-index.yml
new file mode 100644
index 0000000..bdb544c
--- /dev/null
+++ b/.github/workflows/augment-index.yml
@@ -0,0 +1,81 @@
+name: Index Repository
+
+on:
+  push:
+    branches:
+      - main
+      - develop
+      - 'feature/**'  # Index feature branches
+      - 'release/**'  # Index release branches
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: 'Branch to index (leave empty for current branch)'
+        required: false
+        type: string
+      force_full_reindex:
+        description: 'Force full re-index'
+        required: false
+        type: boolean
+        default: false
+
+jobs:
+  index:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Full history for comparison
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: pip install -r augment_indexer/requirements.txt
+
+      - name: Restore index state
+        uses: actions/cache@v4
+        with:
+          path: .augment-index-state
+          # Use branch-specific cache key
+          key: augment-index-${{ github.ref_name }}-${{ github.sha }}
+          restore-keys: |
+            augment-index-${{ github.ref_name }}-
+
+      - name: Index repository
+        id: index
+        run: python -m augment_indexer.main
+        env:
+          AUGMENT_API_TOKEN: ${{ secrets.AUGMENT_API_TOKEN }}
+          AUGMENT_API_URL: ${{ secrets.AUGMENT_API_URL }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          STORAGE_TYPE: file
+          # Branch-specific state path (automatically determined from GITHUB_REF)
+          # STATE_PATH is optional - defaults to .augment-index-state/{branch}/state.json
+          MAX_COMMITS: 100
+          MAX_FILES: 500
+
+      - name: Print results
+        if: always()
+        run: |
+          echo "Success: ${{ steps.index.outputs.success }}"
+          echo "Type: ${{ steps.index.outputs.type }}"
+          echo "Files Indexed: ${{ steps.index.outputs.files_indexed }}"
+          echo "Files Deleted: ${{ steps.index.outputs.files_deleted }}"
+          echo "Checkpoint ID: ${{ steps.index.outputs.checkpoint_id }}"
+          echo "Commit SHA: ${{ steps.index.outputs.commit_sha }}"
+
+      - name: Upload state artifact
+        if: success()
+        uses: actions/upload-artifact@v4
+        with:
+          name: index-state
+          path: .augment-index-state/
+          retention-days: 30
+          include-hidden-files: true
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bbb849d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+
+# Augment indexer files
+.augment-index-state/
diff --git a/augment_indexer/__init__.py b/augment_indexer/__init__.py
new file mode 100644
index 0000000..499dfe6
--- /dev/null
+++ b/augment_indexer/__init__.py
@@ -0,0 +1,24 @@
+"""
+GitHub Action Repository Indexer
+
+A Python example showing how to index a GitHub repository using the Augment SDK
+Direct Mode with incremental updates.
+
+See README.md for usage instructions.
+"""
+
+from .models import FileChange, IndexConfig, IndexResult, IndexState
+from .file_filter import should_filter_file
+from .github_client import GitHubClient
+from .index_manager import IndexManager
+
+__all__ = [
+    "FileChange",
+    "IndexConfig", 
+    "IndexResult",
+    "IndexState",
+    "should_filter_file",
+    "GitHubClient",
+    "IndexManager",
+]
+
diff --git a/augment_indexer/__pycache__/__init__.cpython-311.pyc b/augment_indexer/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..69d422f0967250d2148ebf671fb079d7a6d1230d
GIT binary patch
literal 794
zcmaJ-J&)5c7`D^2Y18Y#@drGCDnz?7AVhU7A3{B$cFSVrlDCN^JE<H;cfASz0UIL{
z3*uk!6FQ!<GO<;<?IvEkt<VXCZ9RU!{5-GUZ-YTdaDDmoQathq`DvZwLf@_1bG$y2
zltd(@sVAxOB2TrVmhvNCwWBt*@s{kUAPSuA%dYB0J!iLNsQOW#l8mICuQWPIgC!Yu
zuYZKEg?Uq7ut{P>S+E7H%SxD1Z`j)+g)7iuIAQNMCdVXP#cC~qRe5<Sij3jKOv!{T
zWHCE1=|2>8Wy?$sY*J?m3d5Guci~iMNDP~oDX>dnawJGJ*mNvey-s5T)hG;?0BkXv
zOy{$aO4+K^tghk=u(~q3cI{%>FN$Ls`c3#sNI1*mB7>&yT0bj`mB<dx0;*b?CUlvl
zL5U%4PVpnPO!-Pk1KM6Tdb0n=nH1QO_1&9!T%fof2D*iU+PBcQ(6JC$=vwF@oDT<0
zz<F9GoHreX|CH6go;7_dd60J;%^<^p)E7K)>W;D*xB+vu@4zWo`dCWNzmr=OBzMoo
zd8uF=@5el@bYksHz*8OP$1rz9pvN37VK^VHH+S9V*kCQ;g?@-ccAC{kgk6hLy7L0+
q?}&v{LT7YKrvE=T<jMB_xgn3Y_s`#vo!{+yJ92~%f!+E6cJUim+UiXJ

literal 0
HcmV?d00001

diff --git a/augment_indexer/__pycache__/file_filter.cpython-311.pyc b/augment_indexer/__pycache__/file_filter.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..207955902a5f65f4057561ccaf8e94c7ebb09bc8
GIT binary patch
literal 4189
zcmcH+OKcm*b(UOyL`oz>OLF|xL{%d(l~%PAyRKq8QS_;eSgsLKZL9_%R-B=<){;xi
zE-i_sQMJgyEdo@B6jsm#)yYYbORojmqCgM56;z<2!WIDn^wb**H-M2t(f4M#q^Jso
zi=w0DyxE!e-n@N(ebmzvBQSox^{R2NhmgPGi$A)Wi^so(#Y4i#B4G*(n98C8FlYvf
z!Ns70d#b4xLyIAW1PE8|gnb4q^hhD(6Zm;!F~Y)&Q5NA{EXrdAm36(NF2)bM$Kbu2
zM+*vz^B&d>Kg0{Oo_9iv36|i=CS4NnQeA{SBWZEJn}>MM3I8Gjc+V7~ECqCF7MLfQ
zboF9v)-X9O7^cI8QCg;^wQS^R!4mXahI6g5L<KKfw&7S}gBm5q*8xe3{SA5E%qXsU
zOLtZPM!#Kl46CG@)q8J`yrnH0PEC8^Ev?Lp60Y(M@4c|@!E@&&yt8Wi=Qdoyy~EYK
z-8|0=k58V9n!$3y))8=a{=$WG-;~Qd^Y#E_h73Yd#8t~6lN%P?sfbd(rDu>h0KXvo
z9;11|c1n%hC7_2u_4hFfV`_!xSE*5;Iz=ATUa=}Bqq@1KZ`gE+Q)9UVQZhPX^L#}Z
z&IT=V-L43p(PEOrq*%5m<O=yAlL?w#kqYJ>PsgHpyrxO;hLrLEO;%Lko&t4%N(zy;
zN{()nY&xiEgH$guU#zw~n!K?Z(6nEWj3PqtbvAxC{qp7U6|2a{^~!RQmz;51<Z-=h
z@G+sU_}e*YST4rF4S3G;1+Bc{ro9i(d0(AFX#{F6jr<cZZIVVW=}v9E^x$$`J^mPW
zNQb^r5UKBVSUe=o0bA5aXB6CNo!wU)q-zeR6*q+JsQ&IjSW`MVb-sahmFgdkv~E#%
zz!J99G&rbXub{#q4ocF#;6L^_?A!kz+oG;ZdXZCKjON|83_kELxF4^_*yOfn;Y>)N
zI$aeBU3JN_OgHiR%<b#**K)Tev)P$9Zn`1KFB6bn;70vxZ1jCf1*R`IVc0qEi#?;F
zK?mSxkHNG_zUU{Z-mU2$Ro|_?`~A&ppT(0uP5e1E^l55n=gr-ewwKa&<Kuhr@wz%L
z;>a&%8&#fL+Hkn-_7wGX^dX+Z%{*S~k5>4h?>r<mQd3r?+0jTkBudh0C*(qr3pH$d
zS2qnNbyF#umd=h0xV9W#*hyl`3nt0UC{Zb<r%#O04sj>woW5=pD@BNT(^`W_KOloo
zN~d{2ub2*ajz!PWx!0a1ln?N+>6+oJfFu~={ynBqG#pPV5e4;t0gnL?y)bpyE)BMm
zx@ap|^aGNHh)I|rDEmow|Mr<r<ImUC=RFHWFpEG2=m<{?CmEhuSUe<MV38eI{rg~t
zLJZ0ql#j0v=YVyGh)@-Bo3P+KStIMg+hk42sMS9nVbZ27ZL2P1XU8tlk&@-mC8MN^
zjiW=ZNp~Ql`kZvg&ggwlV+o(8<x}$ZQ97%ew#>YSGYssc92DxHQIhKRY*P(YoWj_p
z>})3Bh8fRWjJxTDl7Wk9`8p$nB_IhO2Q_+0*o(QImfe8EC2<OlFC)sUFkv25Lw)<v
z<WCYmPHe61MhEty1NG>@e)8A@=gDXBGYz7I`r6ZeviCvdL8TFd-6v0;*xkV7hkYl=
z`?2BRBV{<4N3;5)m4*KaI}mn(9|UTFQrPj&0p1Tf2Ph8bW)Wfm7G&xNVDygM9BH#S
zoP}$_N8lWvv>e3l2Oc5kKT!OPu<(ceEq<2;7aS=~q$v%ipXN9$fsIl}Jv`RMVuk`~
zCa(VlMC$ZOy&2?Qv4^S(u?kfHg5Rn@NJ`{pnWpy!{lh_ULnzgwbWlJGV3pt<3bZzO
zAB(pa?Jb7(s!=X;NLiS0j$|dB?<i8Els793X(jP3VOX;7umt0RjueDdl(&s}a$$E?
zWSMpga!#YSu$7TMtYeQxM=%0f9Ysd+MZwN%bhedoWslN2dQqcVvxo4S0V*m`m4Fqt
zGN7rY4Vs5k#zmXmOB#K>qpI31b{Bvhw1`j5>Y_%y6w&^Q|GwSAPsH&wS;WKqpNqrv
z%<SaCjcjgi^3B}r^&2y}`Rj`_ogUS;)Ffp_-jTI|w!MswV3p6vzRzm**ycvd>7Y|_
zM22FU;(6E2tlNu?TFy6ej#&;okgu>8kbaH5F}iFug?^fIEGuW~VwuyCYQ)Q<88^}H
z53nP0<6c6PMMs>4kekhf-M%9~Q1*zjCb>cAaRhc3GT&uX*_6oCC<~jL=7j>!JH}m}
z!#a%R+QzDc`uEai+UvoaU8#5dR{>)Vem4FK2D$Sh`P!xW3sc)?w$J?O`rCE2|BEn*
zp4=YVjSTEX1~6+*EBmKuee|_@atfd56kv3p{V=~X{J!~tSs%T+8-Hal{tBXg$3xBj
z#{S(mepUN*tv;G<KFNhTJkbT@-oM3mBd7Kvr;y~TWPZ7xyn;{k3Nk;jeSA0m++O@S
zM0gnxKAx*5C-8|*d=Aw)RF9vnt7m0(&L}rC$J|H`OqI{&+<@S21Z`*<OJ2Vhb~efo
z*#cGLLMbnn!EOSJy%@x;AUJ`*rpi-2D%=ekCC4*DNR~!v0aryI6R1GhWytHLqQ8t@
zE?NwN`l=X*Q*6uZ8!$tOQItj?pr{RkQ<NyN%`W`gC(qV9pM7$w-uX1bBz<}-{oct&
zfP|(3e-8%IfqxK~HWQ5)IWbfxeT{HTd3JlgK>+MzB{)2t>{h<MeXKzM?DR@-csfh2
z&MOTPSBB-Z^P&X5v?Zv2_j~Q51FO>srEmK}g8;yb0UVx2$k0e5GNynJ!L%RiX#@d&
h-ji;Ga5GGLPj64Xcf~`Al5nii<s(aqLzC3@zW_p?Q6m5V

literal 0
HcmV?d00001

diff --git a/augment_indexer/__pycache__/github_client.cpython-311.pyc b/augment_indexer/__pycache__/github_client.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a77142f7db2498b69eac2f4aeec78b32f827eb4b
GIT binary patch
literal 13124
zcmds8Yiu0Xb)MPxF84t$cgZESB!>@KT1s3=d|9F>OVnGgL?;q0S+wQNXlF>RwA^K8
zmU?Vv9l<b?O`sx-t3qmGBio{ISXe&@6m1zG1!N~bl=ROGF(|hK2q0P%Ncy8#!wzcb
zSI@b#uOTVhX`24%aCGn8`#k5)x#xc8+^e5@JT3~JpDZ3p+}}x2|As#*kENXWGy{nT
z6h}={9L<@M^fXOk)3k}i=4msDEz=f=%}MK=ZQ4d-T}#qF=a_at+RE9I&N<h#i=^$z
zhB^1No2E?EDT;HvLvhXzOceDY{L`Ol4`;eSMO@i^u2YH3>4mGz#MjO+@nnKeWtf??
zz|8QO__ai8mJ#^*w3x`Gg*yz#X4t6fx7fpEgw|}(^4h{xNcr^CiQ930K9fkNeoI3Y
z$oGra(hEs0Hj_wZcmX3Ga$P4OK6Z^w&GK=b8a-?%|M;{Ynm?d;3Z{X=DFF6OyoED!
zmKh6YeaA8l%))Ow{BvMFZ<%p$PRO@y$cGwGW@@XB0`*)_&rz+{0PWqR5BrRZ^FW!i
zTIQ|m-^KZ$%vCMZ$8F%<*FBZaIZM_~QJZr%jNb_Tc)6xB-4@OdzkStyYxFb%J<U+o
z^qjH)l=(Ree8iyU*1zE~^7*oRe$OweSu^Pyd`hzkd}cvNO~$KI0<4+K|2`du!~-gW
zU$*>6^at?k!)lI1#cF99epUP7Xdu&+Ex4HSb0(F@B-msk%QKm4JX0pPEPl5@tQa=M
zunSO@$|T|}$`nJ0jJm3QObD~$SoIfnOlpi(dYk73F`Z(Q3>%O0qNtZLAljAbnV+xp
zSs`mOViu5L%^sU2vaQ)-q|=B=vxt0hM!>}^B3}&i?9iTT={bH6yD&QkWbP4#IHqS4
z{D8n-t7OL#DUQF*3w!hqV>*w~`8%2;24#s%ES3%G6zQKYTFDOJP+<t}Qor+3q0Wc=
z4|Df(dCNU_v1J=X4SD$c6v`>hn!c6d1<itMRWl3xOuV96uo%}Ba`ovyL-2rdQWA9?
zQu<S<ezHmq#uEKwP%o0{$JF~~<44X}Buk<CVGV6I*0f62>o_iXE>OKx#!>l2QGJxq
zowH>sYaR+XlVpZb&9$*Pi(~@|97YNx+uJ72nzOxSyG7rkZksPtx9EtCod!m#(r}&^
z)5)7W!-D#nVRj}HH+ZJ!^ofb%J-eBnIX01k7$kvZ;_11$M25L=dLjbCV95AFQ6m#R
z#+<JyN0Jk*Qh*|bpTkfm#m?~?tIQCAC3FG-f=+nd*yqWj+#u;wuxMr$l1XNG??C(-
zE3k3UuXW7Sw`UU~zoB_mQI9caX2=S}X^OL{R64_4<ttQhHCmt1d$O%3Sy0>@&QqC8
zLUm8p)<d?8XQnY`GGY@@BWjLWJ`*GI=)#mf&ze~TVo^3VXO)JCUGPDU(2M~Jy%59-
zE^MrCJesZC+{z0=S`b0a!ulhR(1>*#%Jjg1Gue<~HPw}LLRIlS2<}p)PP?<M*w*o0
zc&#nE+7?~jF7G)4?@FH@D{T{M+eH55Vz6s1*uNU=Um8+^d(_~b{K;Z-+x;=w-SPQT
zUuTIjJKJi(Q~$PN<JqNNx$!K#%dhJ3%9I{IF8}_-Y-;v?MnSON94c8&p0>~7DVd=w
zh(%=n?Xg4DKe{G%nm@9KCwk2v^;#iqSkq3Zg=_0m%C=E0!@9-QYr@qFs|nYX<$b$x
z4bi9rsw@9F>$%#?mO21O-}G=+$!r7~IoPW?JB(J<NU)+!3EH^!uLBqQBUvRI2fO(r
z*9Xwz!)mLXU9w-t4^JMkHPci{gXU;}GMo|eVE;yL--gYoKwwy<P9>H=#{tE_%ZF8d
z-T>{EbL5-}D(B+dxd!;>hO{TsT4_a5*WHygj9f(k#!++LOh=^_v_KgrkL2Y%kG#fN
z8liGN$yY#O-t>F`IUiPYk5h~EmA}h1N{xlO8K@Rh#@Zr%FC;Z^zAVs~YmgeR`ztjI
zxZE~9+=iLUHAzhcT$7s~&TZrtV5g~ovb*V#nv6Abe#sAXalmiOrpk~ZHEyWiEH&Sy
z?@3Z~q3#RRS{gad1tf>?qU0!qYBg#p$#3ji=#kvz*>OvrLRAWDEkL@OfLD--8hC~l
zP(ku=O$dta{VFq5t1tPbz$3r0el92lx#k)Q6%N#m8hp;E0WP?qmzL*ID78F~La7BQ
zT%_-9lbphS$@vv$rMHg0=grD<$K)zwz6wYqP-w5sO)bT>8oxuXRce)7lD|x?09awP
zRvWOwe$ZH7{-r|Cn+Gn0J=cR*u1)fTW^CW6+i2=C&ez{okcE@t8u8EgEods$&`a&3
zM9VFcjzF*-4S$9)wq*{~xR6{%o?-Ox=Ftu9J0v&M>XaHF?2<g7*=)u>U{+iJtr^x=
zkA98=7D=XA4z}mY7Rs;yfLY-x3z&gTaSVSugP?&ylT55b3!6dp9}DSlWg_ba-xBH^
zPs9=avtWPDvGen2k|NQ}Je#?OpdR5%0<ljj11KDLDI)_W8>-9$wy7|Wwyl7A<QaYB
znK>GbYW8xG=9vJ4BP}Gd#NNo-kMgVlsA?+l8m#_^S+if`Sq_W}%_e39o}Gi1i8(&K
zkjaK?$f&|rWxz2;K(w5-2xGHZFeB_peERBj%`%sU&a6;2sd>saL}E6T7I-u^kUPy@
zW=eBFBLH|jhc+4uR*hy~NZm-m3~ElWEA(d1tP7c$f&H4B<KxhEj9{g#_Yy0lU;@UN
zlQ?VH0hl;Z1Qhq`)X9PU%uO)Dz^q66>gocBLlxoaon&1nbU}dz*=Ai@m>D5GS79&f
zgl-9GOxD7rXP9hLS*8FRBc<XP6Y0>b^Fkt(A;?fbJ*YJzC}ah2G2G;1$wcag;Kgi*
zz<+ar7c-(}h6<V~eM1OCzOV-igE189n9gksW(dYjiJB>q*6c^`WO(t+E5ZO)#4n1|
ztXb0YVAmo5o#W@Ol24Zq^%8{QnpK=nCNi3NK0U8luBQ_zt)aXy^hz+<oaUNi!4*QB
z7Xoe&noXY@&HrjDfir!a%=Za|rkXj)ry`-P0Ih&vKx-%qTWmo{3NK&|uq@8QJn_OV
z_$lneJSz?_LgjRZO_GsC%?o*CqK&@+@{(!R+?AfuOQV_LmuAzs)?6q}Fku4Bs}sA%
z$-73RijSczddh+pyNc7OdGw*6YjGi&(I?+gSzsb=td%vkG1lRDV`UM<$~0v=448al
zF{*lz2B-&?ZisSvW$%%tAO!F(2lRNwtj7>4jtSK<Av-44gKhWUD8qAJ-Zc*IGN;E%
z@Q@ljly?<dn6;LDt1bJMBP$n_mI<|GBHvJQP=Q{#@8A;m=mtdi`p5J3r_I~$j~5!2
zFFl@;FI<$HFDcEJ)aFZhN72`M?^`8`c23fO@a6E?mHj_I3=zDF|D5VSC%ey;Y*gRQ
zwcbOky@yu#$5T&+l-^g=-d7X{llLs%fzs~YytUZUnr|q!wdcLX$gaHicQz{2em`5-
zsrC#itwU<-P~QF26S((AVfXSy*)y(q##PUFi5hnD^kZ|ez2hg*A4Qe+ezm>7L=Aa(
z`V$f^+SXe;9^~H5{qWoOzYSzIbrb{P^~P-vzo|4biw!V#XWx6TuXRLMJEBXX(lMlV
z3@y5fjqMM-?|QLPx7ye(H+C1pT|YVfqtk_HCA?P+?_G4Rw=$LPpc5!P{G-E5qsq2H
zb=%;gqu9JnZtjJlip<WRdj87um%hLDEqZWnyTKxA>L^@U+kR+u`=OPDvi-EW{WQre
z26_st64<dgSsXpSHhOw>^z@T1dHQu_^bK|N4LSA=W%L`1lZC@dU<i7K3F;nNaXvXD
zGnW<SvdUau^sF~Cg?6=hcZsqfqt)&@`pX>NUTIT0j;S5T7Ecxj4z3LxUL836*!F~0
z23}SNUVid*WndCU7*zuO{}KoTUx5yt+_tsuk=5>z<%H6GMD0GZk=(%}YlFvE2ai8~
zT|R$78JtoFr{qhoDTA*;X9tzQ9@3e;iKmNQyVtsgR=b9lPbppFYS;MU>Eez(izgmN
zlt3T6pFeGlAn`n1AFkIp89BB#a%y$t)Dy4#+GS;AS{<2|ue_;@ya^+{qyz?^2HNEI
z_|gFg^j8URYJii;3+a**J4cf;Mx&IJ%qT#*80q0BM5I0=VSWDd(%9E?D*rA?lLb5W
z{?NmPzZiROO!k&v#c@z|9F!dgpE^9U_l<{92;g11p~p`q^;mISQ5{!g$CaWlkavqO
zf-3k$&x!t*51N1FX+IOO{Az~@(~*~6-ev#w4l|^G-A`kB7fBzqynNXHPbL~d&2p6$
zLFeNx9mjpO@}Sp=*vjLu6B}KaU@N12Y?3S(zHi-xuD+U;Y~IpS2L7)?)vBxzgRx|m
zEJi$;v&~Xq;o53e@^R|QhdD>Re@t@JS;<tPY6}~Bc3iLe*o?eD>#BwP$YQj3b56-w
zYsXn{Qo<$l5`KZrTu+N-gQe~QB=3w>J$tErfadG0F+lTuxdsEywbk<mU&4-v_kD|z
z14P)IBiHa&L&~AYHUFPvFI25?!+bb5_Hpl#g!7DOgFKn8N(#P9-TWKdanAS5<hbNu
z6<!`o&%urg{_9)mvg1u8$Qr<g4p6}_2PYi$fYhb8A*U+6Ff75L;Bwv#X#|yQW<k^%
z=LP;|BE2A<gpC?e3cx`t%W(ky8~_;;GYOu{hHI#<hS;j8A151K*=Mm|0Fmh86agS0
zn$XW(#7Ltsj1=Ik>4*&LAw#5<fYPM{1Ok-&F~HtspLk3sRJejwnkv*1R6^=DA+#eX
zqXHR(*KiaBa+(bwJfGq;j}C3hv<YXREaKh>o2IeZ>lhI1<kBfj0IJkkv*0M;93&I1
z4}xD)MCVLBGSN|qugqZC7b#}L22ir0^v|#(6jHz>ZldA_9uCXi0eB0?^%z3M5mg;g
z*%AFJSOk5EJ63}`mMlx05*$*4LwVPFPhZ};9_Um9eI?50gsl^T{PCy3t|FkDE5+7r
zYpv1M*632^(YKV=gKF!+V)u5n`^A#o(&GJug5Wa@@+V7Ps<Gu>PG&}AAHF4%rRf4}
zM!joYBdc8_%lyjJ<KbT(lFwgMx-O|*mta?VA-Z_-{ws^Gz>Y*V`U|j0(9lQ^$u{%#
zmB7zC<o2(_`((QwD}nQB;Jm!)g-rsSnAjc@Hp=7w!P)Pheb}P78P(0m<b~{(ZN=7Z
zwRO+aa96>lGJBQqJ~g}#*lBdZen<kRi(t%;gMCNGOdpTAj_tJm!qR@M*ZPZIE2PPC
zF(`Oc_C&$|CnO&HVaFS&w9#PO^lYWHGj&Q>veh4N7}dMrHz;zl<!s@kwAB>+*6N+x
zaKr*C$->UnoBpS&+NW#}Gi-)o8F8MEgXV~%!@kbo{^MEuXke60PE0a*rk8*-JUF+g
zIVEbgI<8hA&2SWginO9ZfQve^xAYT`BN5OCb!|W)GhnKNzO8TkmCi#5(Fu{k0}+<!
z^BS>4ZD+lVp;#CMiHSJs6sjBbV1)tAl@Jpt(446_K$K(x$RhgCr`wWTrJ`^OyV-Oi
zf;$8`79pCDSh<@{L4;YTl<hPqwN0w{AE1sXK>%kUeN;>Pml{z1@R~oe>W?h-Et?hp
zu<9Sq+yA{GxH$WMMrr6%8$fN*&JnPS9%kh5kpAlRWcTp8uSNB}u;v?B^$je|D!x(G
zH!3?u1@uB2jx*4%CM)=D5N5auLFeG6L(WP-2o!C8&~c9kh8v)VEmOC4uUFGs!!S4;
zu@%7T-28k1Sv8ltaV_VpsYU=ab;k(+Xv_#sQkI(25Wv&sZXt)zZP`S({r9BX{y(7G
z^5t|(7Rfq;_d=`-kLtC`S#%~wtDYu;bD{>H6}sbyJBAT?KoM+)Pu{6tbowDCb2SaR
zK=&>X-yI&sX5b$)FMz8?fG7bDxXc|moCW7t?R0;`VE_7^cbOaforHLe!DNlHFS;G@
z$8uy6PivKZbdGIiyv7M?x~YYE&{N<n+i94?-Ar|cBh1%8YhD1Y&47P~k7q=3UOkyk
z@l0ZdXr-C-LaOe7cJi64HVv8^db*WJa>RA@%xj!JIHE;Wp*c{=;m)8rDx+yGq#qq}
z;0AIL?-)rFWd+)8i-X!aw^dV>{)#%&f(8yN*&-S#I1~5~bS9vqJlk`M+(oKfgxJd6
zr$9aNKOg`F)<X@wnD-Q;gL!AkW_30dnZ7k<bd?!hURdECpOw$QsxTK-<|3TdJ`FKy
zD6$qBUJVT|zbT)%xOReFJ;5p`;_8XG65`Ylmp_v~1Bbey5@m8W)q-_*WBxcC(>j|F
z*SK&P@bvVJt@e$rj6Cj>ProKlUs3wrRQukP-Q7>aeQJ1MExdm<ynm%rKJ(_<nZ)Xu
zgmUJFdgg`_PO9N#{#^dtmyYA08ao~)WZw?iu|wPr?0#p$HPJ+U)a07zG=J1VW7sp{
zIyyjoJm5MyX8!m94I#nghU+_3XfzIepkn1rc#n#+zGun732^~(H$M{8GAcb#3AXG?
z)%H2Z&R1^HAOwd^7EKp)#-|qq;%L^*jB-KUdf1FO^tRPoJN5l--n+ND3o)4%=Ezkd
zu0ltpoi1M>VKLxQ_yz=;UEt@^H^G*I1X?Rx#8N92-okp{h9E)<=!pOuiby;$bzC~7
zXbFk9S5-xqhacH4qv&ojm;MVXiHH$EZd<69j$){%*vi0La#~ve`a+O*mE2Tg`w!mw
z{#y@c3fC3iKGnBRcI?x)OZ<|Rr}|a@9&)~G|G$L1C_l5N@7wF4Z{9uwR}tTF)Z5R;
zH=INp?P=U%*s=O8=GZkpegk!z?%vlA5g6UH<KRw8<`&PVDu(tK3e^9Wo=0!e7;_ZD
z^00VIsD=VjxX{%!#+-)GP^G#~wg#%g0#V{Mb&Xe%j4Haq_-^JTn*;)2+vJ5?aFM4r
zK0%>E1_VI32|*ktv-+TT;+gcP{Sf>){U;Pn%%Urv9NZ1LUaj^$(@p9k^?;_SdtQ!$
zZ$iVhPJx4RIPEe;_X;;4skzGsF1RwqvgF?-O8c2}e3nn~x95eIvcm@XuWu47f5hl`
zG93qQUW!&*g@I@hY5e>*)Nf#A$o@%qm!}qo?;k3hm7DkK8TBDq3=wM$XA5y8(#)pr
z5U0TAP0Hv+5RiT09SG_-p(DDBAg~qP+3qi8&53-ZkPp0C^i^Q>mcD>cpZvn8-1;IM
z1Ni*)Os`C<ht5HU;-6F@^i8V1N!c;^1&ZN55c3CL_4_?+VZ8o9uJ|F6FF$~bEyQUD
zuCod<279${%Vm$2;rs#sx?6Bk4Hj2gi0c{uFV^~Vx%;vOesT&nsH&5W$U3<{2N-Sx
z7Bh;uh~EKKR2g4so`e`9lN!TW&bAw6y1uLvg&6@N%CgNF`YPAt+%0SF&Q*8k!%SgH
zardk4e%ak$Yz{4s=-1>-P6Dg+V7<9@ad>eUF;xgKm57YIJ#v8hnQOvs{>Vl{7-`1q
zvA99UVwx)!1IOnA99zaTcP#eJ1vXhOvBzRuIu6-p*zyIu4<#@VXci7G4G5R<_Z$WU
z?piYlyM<eraT^1KoC0p%aB`4NCx!3g@9#nIHxyY}Ixt1Axqtz7FNVOTyj%S%S`NsT
z10{>Y8h#irQ4ke45^W3uE!MU|bBTgz=@^NYp+5Xt(Ib+s3--QFTZ08C!+04QV!Se>
z$B&1fn4esD;`rrD`tL2mtf|N9Dh5L(GsaA>9`B0k@#p~(uRA>@E0mWgV{n=dSzjcL
zF~-IiV`Gd*_LF$s=`LB}cZo6v!+qABq%FqS7UTW_60bYG`P-_GDGV#V9ZMlKGPW|M
zL?&e4gz7w6vO>)gWem>Jjn=^>;2$EK28cEWea+TaXfh&6;E*JN^pXT}iwVLmCQ8(%
zus>)Gkv=fSE-=O(FvbobChZ|EQO^l<p+QlJc!fVju^Gbvk+&{&c+RcaX35QR{hoP?
z{v!&j5&kDd^-8RMMYaKVT=b-aVO_p?fOkfcBJs`>nH1i|&e5HsZ^({ueWko4Jb*k{
z$dvdq2*58((<PIMwt!s&frFx5cb)iGq`Y^@-#X>HOa6+rM4L<*-Xb+98{c(mmu!5C
aRHtlwOSY>P8fI<l@Dq~%*>g$>YyS=KM#Sp?

literal 0
HcmV?d00001

diff --git a/augment_indexer/__pycache__/index_manager.cpython-311.pyc b/augment_indexer/__pycache__/index_manager.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dee5c6b3a2237157a7d5c62a4328c3e578c6cee2
GIT binary patch
literal 15403
zcmcgzU2GiJb)Nm@?sAq~{x2y}9{rJAkzAURY$voNn<7O~ibOdg70ZmB?TRxbS6c2e
zGqaYsCQ~=a1JS6c90;|F*yy5lT^UkPJ>)?sn!rj5#0C1W%TB=U04f9odGJfSh8)1~
zL(%Wt*`1wPQm*sU42Ng#+@EuQ=X~d!d-orL!2pNrZ*IJp__u=`_bYm-T<&t<@pS~=
z=0t9m6CI*6>6mq}r*qbcrz`1Na?iRQl;%!)mb|lG7WO3TmVC3mCI74+?_RMk8Ca^H
zt!H6hl3xnW2A3LU8<rYp8<#?}p{1tTCZzdio5jFvi<3LaiS^&(ME-pz$KAodam}`h
z&S@?Z%<d1IN{P}n;cPq=Uyx*BP#8<gl8~UtL~21urWX=(fsd)wRK%gXUy7@Xc>2yQ
ztBG_fo{Tv4;CMoo=G3utN|mmux_cs#ls<N#0P^uC6Y6B<(pWMfrPPQ=4^ZORVm!4V
z>GiB;6q`>hlwZzEN+zl5Mw_P9xGL!l=cVPel2Fs~%Bj?R`ePA{6Q6?p9JQ%(`O1OE
z4M=&LlQ;}5N5eYn5}lG;bV(l3EqO(c=$-e8b>DN()`_j64^N-yk^EAiv%-m+cZz<*
z*NXuu!1B8>fbLly`GQE}5pEEJ_A*FoltPH}TKQPQZm|L7n#4wX9>&uxhLF|_o~RFX
zxWy*Cw}{PBtJGHI!Rlz0+N&{0e{y{-;L#y^G0AN@&qlgD#n~fYV#bB}v@95NC&-lv
zNJvUlubV5+vljuHxqw3R6kEo^(7m#xX5`e=TxH4AsJhDkk1rwcHn-|n<zf`iu2m=A
zXt5Y?F1(qGRIRJTbKI))vdQy(<iArb!^J$6e4+#Wb;da)288l;DwR+Z@nj+^3F@LG
zScf7SFmsK{3(AQ3Mn%j091(2G8(#+co0^l@2qh|?NqSk5<Mc%+(OM=LqY->mG*%fS
z4=YM-*;?FQx;nBW0)2Rbnr3}q@xnYlCB_PMD+xuF(5h<rsfbti#VV6kM<Mo|#i~qY
z`fBw0g@n48xipi$BBk`YnDG{I>uyC#&dW{6A~zFh0a0iUXmCD1I<%Nxl7`}$g(ZC1
zLy9~{;e~`WD90Bo(J|IRNgiU)*peY*qRT628PXDJES7CMMPpcAo6$;iJ5}~H$eUcL
zo$D0V-PeP~j&3~o;*S06Zp{~d*s(V^q3wIN(D9ts@f@YB<L@!baC7G3zzZ!7T7r*%
z0D|SmUB=V6a@Ks6NXlU@!OK*VUA9<rDcWMqW%xcy-7!n9IcUjOdDbr1Y|&gYkbmjq
z>&8Nz%g8c@7VB_nDWT#koTurJr-b;Nnu#YX@zax|mSsG%m{0_WS1~Cm!dzSdy}Bqt
zpr1T7GkM{747r7+#KNK~T#|%6lP5;U_XzmX<H98wi+WK=#g{B==)$syPg`MY&R9Ru
za;wISMqS0yVX3mwa8Y2*p%p?G)}Y*l(bMb8=`r*|?w}Y?`f5s&bvFqYJ!s<^amqU=
z3twRxQx@Y2O^G1LJ1M2P!p;~QO3yxJTSr?n^q`<J27(pUz=e0-T;1##+UOW6@}cWz
zG=5jk{T{Cg!}kvV_=tAs>?bZ~IPfV4vd)*hT+cyTUz%@Ep55{;@Ubm2FCu8dAKwFc
zoAYz3WocpZ%{K!4yDs|zd4^kasOI9~ILo4Q*k#?EQ*_gx9n2lvG`E)<#@Bh(dCm0-
zch%wGUV#jR8sMfBwStal+dfW)it!3dxyl*2BcAxT(0Fqdo=L|=LpBLzNh7D13~{5Z
zK+Y^#7Tk%8Gbg6TPfeW^X3h!cW+qRZ7p7;<ogY1ULO4D;_BEtUSH&uzJQ;`RRB4qi
zN$O%+q&1zS`YbiAq%*RxjHM#s3!s#=4Be#4W>7dUi3w#yxS&Y*E)X1CUJ>S)jzRoR
zo|v(S$<x#4rp&Gl3e$&-a+lI-`Vi$_kknZE(q(8c!aVhJ`jBubGj~N&ErrJ8YWxzu
zrV-)m8A)CdSPSuOut-y!8(n}dl$OORvYfaUUs^6pYNOpIWBNuCOUr4QK5ulxmLLm9
zjo2tF6{W_-GIFwyWu;KWmY<qKMWX0XpCRP0NGtX{20ROwm2@gfqptK(sfdu67g%vA
zi4mPjr=;zhkFm2=S#CQUN?U?*2rXgJQDa*AdX+=HwQW(BftM0;Du<}i<yq)E&PM;J
zF$n|0*QAxBTRKwdtYq-`X7BN|xYGMtRF>jmUnDAG4n(Q1H>1uEeq~F&q@rvYgKCe@
zGaaTz8<+uH(}-a`GC?y<N)L%n8Z8<>!gGemVp>vCy{Z8HSy8KatUKh-J*+(+Kz#;Q
zr$y-3Q_zXsP##ip7$2wZrkT;*RFU39s`6A?ok(X=;t5$!%N4=sn72v8`WWRma6KEg
zi9t&Wj1o@y5eVepVYfg0u&M2asI~XpcHLXZHytZ99n+eQl{iOm--o*&cI>#hl52Wv
z_2z2Hg%}oHC7)2PPl;6HwhMjs*=O5DSK~hWb)$ecf4EADEgd%w-#CnF{b7_>qG<7V
zJ4Wlcziu6I{fI~4pWMgle(FNtfx~gE?tzm9>jI-~t_Q6SP~F28|D1{8;9!d95?<bZ
zqHwkg+bUw`NVIRcAj%zgowcG^^;a|61iwpk-}P(}{7$vLn*TDjlU>jloTB$`ojpGU
zx{v;9>lA%Y1OTK&{P8X*EanF~9fyXIAg!w`bRh)U%Kd*4?h4grNJiB2`-=AI-%7ME
zXDUL41pOBa_EQi=ki+<*GfeBTAc!H+F=TeRUzHetED<k96{~C?Foc+xSz1;Mi64n<
z->5&R;D)rW4s4&ftO(5LvWxF6$z^dquqEL*B=}K=Ox6x<%?DEorj9zZvMe3#jV~`J
z6LZY+8ltkjHnx`0fFK2WlN2}BA{3Cz8ATQ10_g`rSpn#?$aWPKM6e3tP{E;fUy@AL
zSSW@;DMB4L^b^2BJhj49?Wiz;MFccg<;10oN(u~9nv?-DDV2l>FH1&&>^J!_d^gl^
z!ZlJfm`(w;7Fb_yaC}ZqE6|$YA4!lpaWySpf!$|O#kQKTtgA+^tsP4X^ifu!X$DT5
zx@FXrQWZcG;Z`htMR#14`yh<u2*}4IO#xh9k{c;?DpDttOf*o-Yn0VJOIO5%Eca8w
z0Eq6UnT)uYIxi1X^bm+bC9zNpUA{g>joBW(swwBG+)qK?<bHG9!TE#hGg`2_;M=9q
zy?@i!zv1h@H~!(_ysy9Do78-hdEex3yj(-`jeT14p4`~I-h6PR5FF8hBLp@F?#CZO
z@jrD#y7l$kxAT0Yz(+Jbf&d}U0hWakV4eQ}u+AD+O|1zTfcFJi6|$rsc*CRpt_S_@
z(V@Br2OXd`Q-Vhp8llJk1*5@^RU+AurYTXyryp{n)0RTz*@YCiR4iox1eKI`y>`6A
zu)_AuSqza%UXFVo!8_(FELtvSVwD+<z-o=b@+#KNnsd#y>Xh41#^TVamRfb$+c6C;
zN~21ihw=r32fe;y_WZk4zXfM2`L@>2%%9e3I?SI_Yi_G?CVE!gKjhwZ+G}udDw(O}
zi@dH3dHuGrq?uxuO7?2`TCHj8-N5_k^&PY4b50mv^=t0$z%(<SR~@Dy#%JF&rNY@b
z0EfXe`C;+V!R}a2CsNQU%{<$xs_aug&pfFDGtiaH+?)j2LE<2zFqviY6(u#ENLsvB
z*_LTFE~`{0%fuwn=Z>Mz9WQ2wV3fU{NM{t|a}|`ubS5bZsWfvZFijZJl391GrP(Id
z7UDIJS|o8qn9$wxnPgHXd7^WXI=!ykI^9hISLaC+QCKlaEWeFQNeOaP=jRrsxhstG
zDN*+u#{cwUT=&3GOv_V|2HmqPV}zKCLEeoj<ULf6*YIx0<WSOE;Vz(N$EY1}NU&b&
zLHjFzK9f=tOA^z&^%gj*=46RHUGZeh&<}L~iEDEb^McEy!pVDyxeHQ1;xYUe4ffH}
z8_M-os)VyN4#<)xQBa|;4l=PK6+Ns~*=9Bn73Ua>TcO<N@F_qNHgNuilG73F$?bb@
zK-+T|;wad|$UXJPtJ-su)rexK^(L={_L4qtWckG9^25aeQeq^q)8Fu@d-rDd;70dg
zp?gT{9$KF&@m$Y=&HcwV_8+_dox=VpZU0n(-?Kh-YvNI3H*r5$?AWo{5!vX7-0mrK
z3~L?3`Ot95$F=UE)NeQ*w(rUBohh_m(AqELLl>alZ?^PrwDjMeDzrSWwLG8apa1M(
zBMg85x?+-td{e%8FeicFF7Q!}kLLO4!<JpeaCfn7N3nYsyPqm{?RsZG>pJ|fz4O-o
zw=UnjTx#^SHhjW?d`hIm`GQX-kk_A;JgAv0jeQu{`=id|ySWF??+5+a?!ee#*U$Qo
zjSabe9&sT2^C1tY{B>m9-n!_4+$9IMf{h1~VxqI^tQTF?Fr)~07*|y`Utv;P*;!N0
z{e*J#1+mL0Cp$$CtJAV2V5`T(ge$D1++?*>#JXDfM>Yk@n`rO^dB1H>Z1tM$M8E#e
zw!II0x?^J0niIHz?U<UDn6k7o0`#=iX(EL!_V;NCp{QBiWz(Uy>{SWU%h#HF)g@ZC
zl2+YUSYPBOv0miuX+Y*6A&=Pbu4f*5aW?*e3P<oKS9tka^MFUAWj%N-{6qFM#@}P(
zU!6-6xhkCLX=}kBegwU%Ud&|^P>*cV=Ip<lC^io1E-xhvKMF}@_`L?1uoIxBrz8np
z7YpcZ)pPBF)=+pWaaK!r!0ND>Psq^!<xI-LdRsi5$&kLCo)`M;0(=0?Bmo|&YSIzs
zjP7TQV_>WY%&oH(hDN%Z{OgpdRZPsoy$L`80#)`m^<XltU>}wI{eZP>%T!vJV|%5F
z0Gx)0QW7K1i78?bzy{qMcHJ_{B5lc05oIRZtften&9Z{+9u_6^WnIEC;d4hs=hZaO
z%h-EWvLQ1k16-kxwJNcY!tQU3Ma7J&#YnIQP8rh4r?6rS=YBAu#4N38MAc*vRGOd%
zmg3ipPAPiJ_=$<p3uk6xXGbr_#?GBRdunD{_aVas!M@&PO`bS)=ESs2TZOPTNW)^1
zSN;=(^ng)Gg4|{GC~j8frx(_Ky`kKjaz7Lj8w_I0AS-_j5!u}~h}}*%_&a1%9x}u&
zb#b9x05*Sbv85;9(gUCi?znYC<NI!RX#DWKnIFHZJ#%t{KbhxG78|>7CAG%>+Y?&j
zp)wwCG*0FlCm*#8=Uaxct-yEP8q(?y+!pU9wdmwV{bas=l0jc@f$sy;Jl|Jr5pEns
z*3RyC_|5Rajqt(BhDG_=T?CgGG`_$5-t&py(+OM$Sr1~TgYTsFf=_S0r5B9ayKcU^
z**3h<HvA<RwG*TEz<Qt*;@}fZYVCvh_VHg1kFVDk>)UQ5w0a@;{6>8=UmtxGYAuG&
z=4S8twf!Uc&{^E~-!Pt_C69~1*aa}=xwg)mXSCKxzV+BI2am1$iBn3e-+$Y`Q9qoo
zA7-4UawE4>+TNr2&=l?uBgPZd#tG=ZbF*_~qjTg-bf$|s(-l~6pi$|fQ8`#-Gx4Z#
z=lUAk(~+k!*EVb?D}#{p@!tYHwpe41WnesHtn1wx8^%G>)Rdt)6M4YNypY4h<m%?O
zE!LFSwpi2hvU>;DusQTSY?N<rW!Wvb@mh<{Ww(i})>o~4hW>iD?XPK(<xKoCJBvUq
zA2vqSGNeRrx$Lb5zPfYUVpmN*&#Gtp5wlrpzs2~&6N@>lo}UqY@3_{yutFQQG#L@A
zTP$n!scJsaF9yD7e8u`z_vKI}KUNsOeP31`cY}7@61dus{m8`Cmn0c-89R3dq*h?Y
zL{|&&ZN*dr+YBr<!sb4g4Wcbfwna}SUai(BZIobp7Je+kRJLtmh5Ra*X7`jk!0bnB
z9szq`guEkyUYAK-NntNpehmX=^P|vLLT45pKMV3Uw}3D5wRYx@aI874tDSu9cRBm~
zzU92$fzfn);MCphWQz~JJ90ZJ5r<5iBQD(;eOgW-D5pVePOZ-aN0M-&5k?)$M(qG&
zwK}6mlIgj4QaOe|CBFkwL!l;PpN4aG!8ogs|2Q;y@9_QbUyR=na~EL^8D>$*RY_+#
znHc-LZd9b*B_Z3)Jp0(ePaAqrG#bS&(r#+?MUW3%FshC7Jh1wqh0RjR%OErIixecm
zC69stf{#;pjL0~V6GSG6oCMK>WwZ9Yp%XP(6oin731WLx@GM-4!}=%RO4U9$OlMC5
zJIjjhN@6pNbY7j0i(*Vo8;3}AuXK$zO7%L#h>1B>4ib+eL`ZVr41wt!h$X~|#z691
z_muSkL#;762FpD+oEdtv?UmJ``%}_YBYCU#a0aZiT~D6e|3<oU1+xi5y%mOfzTwop
zXY+?9?!SVNaTk22HQ(vH?=*0^sqH4e85-OO4OXCR%ZY?00GP8Wu<j`ZDsx^uarzhE
z{Mk1jy!J1zv4!vntrp}WnEQ>7)%wD_u3J-uaK9Gr&o{ha+y&o@n(xKD@5Q2zzaHH5
z?b-0{$sH^B4r;!GdEY@p-w1Ao2R6b3g>Y00N7sYJ?gN|M2RFJ87P=2>-Eg#aAzSxQ
zv3vJBuWH@TJ?!c$Lbrg5&{qV&zpAzM0IGMu76*ZwVMjx$u5LG$GswEXgb_i#;SFE-
zR%gMtSM%-7`}Y32v8&h`E;e;M?A&?lTZPU6t#hE**;VZ9(K-+3J4fz2%T+vG>=Iz<
zhXSzlL+CFEEPa2#IGABYTe8#`eqVznR*T=YHm_>pF%DsvTtugcy*tr!*IR3yez^l`
zYc3oIv7noEmrty-SaVL%ch_&nbYSF{!gKg5E&kHWCaS7!=zsaD#b2O`*{aJXs@h`d
z8)Fx|m>8StnI;_5Gm&rAm20fzvp&W8yL{~kr*`ax9`NP!S3IL=m5GQsOHZry+1hQ|
z<hEEs19rg^5wwiEbsS7zam`%2f%TTLtZ)L><7j7<m%YCXZ9HpUE82*S>Y$~x$6f-u
zRmhfJqe&pW>D`(#SR)>}3q3VwBxmtmKe;ZH!|^OzIXZ-}<=U#v*veEpZz)-$kJYW#
z<>(;Tma8V8Z`Eg;F>^LBW6jly#1?2@@WtmWe)lTtRlj{q@ojvynhH-$t8Ij9M$?v7
zGy1D_m%A&RpqF7s!}0~0ZN1z6J~V_o_%|--5gplIY|+XrF6Hu;vqhEKs*hFt83r6Z
z*~ejF^1JF|+qJB0I15?9qLgiuF&dShLASG>C~e1PcNoV>4R4Ml<7xWQ2Pf!|=@QQA
z=w2l=KcBd!d(lbu6q(0gR>4UoT}XZogmx+8%ZV5c0~rAvfttY?^T1L<q5V|sSsMDw
z8kLYivQ38iGXVDqwh)*`<Taw0k|Qg~q}N*-W#~HIFV=M;yO?sL`=GEalTx8KPr<Jv
z&fpbC)#Q~4#2Ndeq-3$ZQ2BYP$}^WtWA~WMIz42Zo{F(^UBpFxi89lttNYMT<3O$#
zp$l@-(1^U5R5Fpe!rU%$oN_ZwZV!d_Qn|L+vYe*99_9imdvFx_8f93a3{BX@NMpN|
zRhpnPAaa%x-ymWtQ}QZaBY~>ZMfX!z8Pjr*lwx_FSQ<Mvq^Ln((;H1Ct2~E)N}S&$
z@@F6lS%#IhRPn=1Auek_n7z%0%O0pw%6Xu&sZTv!@C|Fe;k<7cs!m(C);d&dYQ6Px
ze(-dD`Xbb!HmC<6P>0%}a$q;Ot@BprJG%<4eOhZDbP#8~KWgj(r*hxY+7I7LeB{XU
zrwjaPjX#Y*k#GCUbANuWz;|nWcb@MChwcN~&XI?qmK&$uYPi`@4DWo0*TRG4XMd^A
z-3^5Y<WnN+<JZr^-4Sfx<aciHJ8!%5{LTVDsPTh&c7L{h>Pacsm-szgHIc#1zT+Ex
z$3GnTXh9qMMxpOzt?%W6Z~r=fBmHY%FyC+yI^mf@IHH9kc)|UXu@4&yM_$s7ykxwC
z7JRR0zE|?TSI`okozQxQG}E)2e9s2olUptDhc*6io<IDko;g#FmQ(vT`2O1%G<e01
z<oP4Ni!NF0>bt#D>v|UBNg5*vW(%8LPu3WBk;eFHq4R*&d4PhkLg%2?Ik=7HSPIsW
z+6eL~5$L1-fI`~OPe!9-PVVQ<@K~1%E>*+tYV!q=1Ci;Hx3Lt-qFi(0-?i!(=U)4W
zd=u3j0@3k0Bo|_k)41d)G&xhJti;mOrbW6fo*+x<+H+>L2zPzdR=cab$Fl>v*0yyb
zH}qlS+QMtW#>-vi^$o<D{wU}TPv&gqs^tt0f2{HNSNyQVMf)ldPxe2n*lS2f<G5RS
zirIcRju1%;X}SC(5x7ingolYoV1@-zx59Fn$%t?U20S@5NdB2#dv>6hTpU$*wT0WZ
z_*~s>FeNGE>VSb-BxMb`YTI<sJ;;I$av#t%F`tk`c?nai*BQcIqyTL^i1<l}*Ej^g
zd=@&u1xHUs=FniS3_Vm)GbYdtwpn3pX3Mw0Q2r|-Zxd^8x!*L^Oq{dBJn|3ln%!f!
ziMK-}GK-b}L1s8P!rZ=|b-uX2cfIjZJNA?t{OydaPZS&5uCL{VXY!477h5`Sz5o|;
zQ+u&h*lg|HXzhI%-kIxpum8UHpBwYx$wGKi3s2(Yep@?<d~g9FIoce)*`14QH1+43
z`ir5bHbXrdp`P69@4Zn79nwOF^6ZAyY01^nskIz{i@0grQEc72*?M53^}y|Cwf^IU
z)-kPhEZ;izyEezig&>~tjRSe#05ezNk0I4@DiWq&#n70?VtODJ!(3*P6y{^GZ{r6a
z<&?TuOia(kVoZsU2~G{YffQ4DKN0${Ws+#@yn_YRXAq}*RNBtc-IvnoB-;Uz$&n!w
z2IJr{7O_mmqp>7tIzQkH&p7??LM9uG5i+He?_(C<H2;e3iM)HF<ZkeMrP#5r<f3Qv
ztH$%>Y4&{N50*TL$Nbue5bzwt=D&@EcX`6MF6J(4-NPjg;ac*dW54G>4#<PYJvV#Q
zk|{@@r{TsMB@T~TGUGVy@N^e<_2jbJu7f2Py*=_btgAOJ-kQ7lTE20Q=HFZLP;`mg
zLN7XpJ%QZh?dkW<mN*3OU15(}GVeI)Kq3|rJwM!GJh!3eolkiNagyTJX|1ET<id;5
zmhtP^TZeP5cfOi$+^6~Xmpn)*aZez|Jkb<I+T_2dnHnJSeGod{r!FQFmzY+ndvVkT
zf{m>dz1jHPR!kAEM9VHPV{MTEqX!KIK4u)_H-0hZC8a&77z>51K=LCPOOAQv=IB)-
z!WPU26r!KZFd^}|?})LqkIDatI06LaS0Ff<;&7ClP6sBFh>vsdZ`RXak!yUD{S~=z
z-hLOk9eMlxh&!0y?taAe=IwWpo5_E<`%!ymo@*(#cI3IHlJ}~|fl1h&e!}8EeL^Z5
Gg#QH}YfxeU

literal 0
HcmV?d00001

diff --git a/augment_indexer/__pycache__/main.cpython-311.pyc b/augment_indexer/__pycache__/main.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c31b228160789f30e9711103489cc0978b188151
GIT binary patch
literal 7988
zcmcIJ?@t>^mfd!{-TsMf0)Y&WPJqdnA2xvmG6dLVGLR4m$pDEi?Ce;My8#npbKULy
z@R*HfG#}XA=$46CVa;YW{&32&N>0j$J8A!fyWD-~j#?V4C8S6vKFl|dW=4vl`*5$S
z{i{uOGCAqW<*HXzuU=KXSM}=E>%X{Mb^^lJTfdC<9wLbUMuk$Ki@^8K0N5coVv*oT
z&J-gTNt~M&O*l6%njtsGs5Q%?g(NtNv&85%>!KCIbc|WEE!qIK@^;?AJC;n%r4fiF
z6UTf(EjmkWxScYz+o0VA?KZ%*1MKF229zC36z7C;9aqP>AouWI&VAE_NDEFA!FwJ7
z^te0{jBlF9f4?X7nf+2Y8fW>qB-~}!6VbTDE+qu^d{nxSyupq~q-Y|}PQ^L?4lmg4
zSH<u$KWt|qL^zhe6JA@7@nUfOuC$Vf_ln%jU?dTj_&X9t(W}|sHFh~Ftt4-R!kAnr
zs*_`JZvPfdY}QX|t`DOEACb=K6uu>)8fu+*@Jr!1j4VLefy;oj6kXP6ftQj(JW^g8
z0OsPqC@ZO|Z57ePA&0qBDwT4Md;}Pe%UByknh@(GYpI<8t{E{tC*O*`+1Pu7Op`VA
zDI><`WSU5mOCT@5B^5l+OYHcSDK;YT97rr0j*0A&kXQrp-HHl{_!?BQx57d+d;`Q1
z(7fYU&tICDT?j#^(8A^EiP=>DfvVSbW}_l2@c)<uiEvt7b)TzqGpT>7sdyua0DC3I
zhee(hVFgi%l@jdTL{eZSJ|2!sz2Z6_i7rJWED&ek;{(fq9`?kFB(00X!5~8H)e!^X
z^=KfRTt@4RBse}0Nvs7=`2Cb-N{E`Z$OX;3%uAa6S~!;ECj=oO_)VG(dWNLLO+Kz!
z5ttNWBH98h`|4D1C9%c_i)4eM5JB*Al<yV7E2Zkvo(-<SX$h>~)#_o45TaBi3&95z
zL3I#f0+I}|@9}Nhcia!$^3i_9GoX3~GE;fSp#ni#E|YuiCb{{N;+|FAvobTg@AlkZ
z{X_hBak(|1xPz)Ym^q(!_zDDNu>)$sM7pk$-<i#I_8$mH3WV8aFW3oZ!`8CwXqBnf
z?_o&7a1bb{K!@S`JFwCn;;l9+D7Qz@>v8#*prepO)D8shz^0aL6{K{{k>aSEn2P90
zlbofZ-UJ59bfpZ(!cn2ZSq~fw@?)6WhH1n6R_kI8tc!Z9{<Z`CEomxOx*jSM28gp8
z=Pa1hmJK@o%fE~7sKKW>XWH^7;?E}N)tR;w`?GH~Ul-@Dkt6+9{p${_*P6E0$i`!w
zH>fOA+MK4-)+GwmmN%8em%){=0Ow!e*Arq?O2B0kjl)H=7RDFMPbe|mc@C?2k$o?5
zJI)I|m`o3QLkPzsD?Mx^DF~nuLZA)SqLM#=yJ+6?QwtZaeh`|QxN>=ZYT@$Sb&YsM
z2ERpHTEGc9AgR6sXzW^2l-L_Q%n;p$Fh%w_9yf?)e_XSddrWF|6*x3IerY1*5cnlA
zxWb1yF_^OBf)ri`n1U>*(I3o>&z`%WQAm%JDa~IPPjwe50sKaF>bDB$a0pJ6xFFFi
z;(9D9Y33Lo7u=}AE=plR5^uxB>t{5WNQv*7WgRtW7Q7^lLM@uTctOK_wYmx%iig*D
zjn?_Bxs7}l85jpy0w@SxG-v~m7Wq(RFi@MoK|i#0z+W7In>9oHXeWG!wvX(zJ!q5J
zfv0nd_l)X2lew68Hh$x5dG2h<&MVGis`FT8V&7`tJpEbfi`3RfpWn;e%ksJLhbx(T
zyZqDfCy-+uI<2^8RQHU`%)Ia&{%krkv3cr8x;#S8xqCzLo>aXjGZ*(7o3>YX;t%3-
z=iuK=O5<6z@oeVu3#<1VYs+(MOZL}_wOh4z7YM7RKksEX=}kIstN+H<_T1K%O)It@
z)z(uWEU3|+t&_ceh&oae=;LyqeQ&+Kx*W6nu0HlAz29(ndvWKJ2cP6-o*q{kPOA;4
zGqZU|Gr9~e=$FBy;y$ms&&$mD7Y@%>N0!dHWk*1!0zy47&!F0o%&}_!3OM?<s@-^8
zf0m6?rO=JGBv!)YhN)Uv*I>XY+c0lX8=(B&O7+xUE7rH_U1LXWFcRWfF{F`-B0xz|
zX$w+RLY-7sY9okr&1DmZiz?03;4nrmBAh4mBjD5HGOs}SSyO?TM3`g2Dh6{kDZEkJ
zY0jb=(5<!J>e3t4g66n1{tuyZmoHtKTA0^ts4zJ-GciBww`mO6Dht^Dp#i!o#59L~
z&T*yC4$BwN85K@IqPdDk{T$Y)ntd&NM+ZfX0eCVB=B1yhQ9@YpU>gXZcmdstni*ac
zFub#<RImvqAZ805lo-bq-a}m+FhWt6i<%7;me6=2Vk}lR6@}x70e1lxaaTA4?Gx}9
z(IXA4ulk1V;hnJuW4VA*e^RYKnVHEu4x{sSQ9o}N6!(<so|2iV{pKE}IiNNN6sj>Z
zwRwB5>laGbxY{+YP$!T+*=o$T?Sszh+6g=e<XRP9Q1t~f(|aZRF8itC=~g}6nW=q8
zW8QltU)PvF#OB@gd6zd|*N|`Q$e-w!>qq`Lu=S}@->%lT%k}Mlqw?)NvUl*0^;_4!
zY};<zYJ1vIsJA=m?+8eKKuLkHTWEaxu~;JIhY2hYaTlwATL6FxsA{V`bI_wSO*{g;
z#|RgP4N`(5SqiJ=`dn3QsTHgkGe@P#KM|be&t{_~0o$9_pCr->81_<F;#njpc0DYu
z;725?FK(pbT^&f({pyo&Z)&`Ev9I_2k9$MEens|b_W6bJg^AFW@r4VihLU~Xi(iMm
zm~JrT2x1_xDkkE7i)Ipd%`7Hw)a*dNN#DL6F&vLd(G<^x^t-EQ>+83a8=3ByTnJ57
z_yw%?6Znhckifn<+`N5DZtHv6{`8h|cuYM!CR2y@9IpGre;E7SSe`i~H%`leF_hx?
zo!NxM1xbN06~z^)Fpwb7^AU0R`~R`jN~-q3NfI_fDocbQjH+~lkbpudq=|~`jLHZk
zT(uj}4p=SA79?qga+wa;mo=)AH0&jie$_b08a0kpgKYt9v@>ydBQ}k8)@vwMDdSaS
zW#9mm%c>P%d#V13)wnO=GCEvp17(gBJy~-C%xQBD*-i(7k>53IW8SnjpjvgwREQg?
zZcqm4*nX0q_B6GM>^U4>%d2WhI{>@PyEkGx0UIgdpGVsT*vQEHdDw2iMz7(Yhg}EQ
z$j14**wB8c)Gi!0@Xu4-_PRAgdqcVJkfE=)x()S}dP<lCv0C0mPkhE52K%D73a_aI
zbM*&g*<We{dPhod?wt};opXnby*DiI=KDR@0CP0#0Im^g3{vI}LybX_Tocq7q{THu
zjX@&Z5vVb6eVGOm?fuG}(iZL=<H}DH8}R<iwN`K{5Z97xtCT8Gnl`SBYe_+)u?}34
zt>|YQjU?iZcH$%<QnyWa%-4zABuQK+Ncb6~*mjWCJkd@_W0g5{5W*;!fYoQ^Xqx%|
zkoa|2$p-TqhHC{ED~4{9w+Vgrem3RC-qxjL9D70osrK`)bF{L(>^yqDMdQnXKp^Fu
zE5F``SpjX!6!QUo`wp`yr*4?RlXwZ0pdZ}B=w2IU!S98gT{ro=*xE`p%<*C(c8f;_
zDS9fi@YsY-&GymJQQgT4BeH;$j7cI3l_}eNG7<sX3$W;gyX!pUb=dkV`h>Yk#fN;1
zmv~%pZiSEB)Sbyw9|A^cj{0!kIbOfqVWcqJ=YU|vema;jsZq#=5YT<4xr)~B<*N%<
zt}dh)QJ>3b$|Av3MpJHV7ZzP}T)E8Y6M=H1JdaQm+8z3;L*hz!R3pQ$Nc(K+m>v6*
z(Ym1(yxFh>wm8sIG8VggEajREOW_#HBUd6!k%bqu09GxwH?i@6jgBf)_9RSRK)1L?
zgLe?*Ap}rO5GCm1)qH|3hoY5>CZt)gy`x!+V`~&zqvpg*#8U)EtX?WIT60#GH^pf-
zeR1<Z$|dlan9g3!f@co)(p^!bMTvu=M)7x|QjPVbQHgc%k6LaEQHj^6Rrna7*(UBp
z_;uu?0VAH)4Q=dOka%o7X;!F14u8$Mrn~5oPaf?b%~dq{6G>@3DQQkU55=N!Uew4X
z;UnP0YixR<1+fVwO9&wed<2_}vdQ+KLiMRspG@@?s56#B``(r>Ten*`>Ai;b+~hxB
zQX2ZzhW<_aKI4+z?`N+){1`HbyOU3*bx2``Rc2UbhW8N4d)dZ^t&l-f7{AK+WyX&i
z2C~=G)|0Y(0HVU2QkhdSa|*Zkvx~CZ4^d&dRHjR2y1>&=&#Jz6cW*tp_wV$7xn$op
z#dl5hUE8#xrlZ;0yYy4L;ybJQ&Td-sOx-uk@#oBOh3Qb44w>n|lO4$|sqYTU?h%Lz
zGpaJ9GBa8LA4c;|>x0%Wk8U5`q=A~vExWrErdwsYWu_ZXs4M&K!vNGCXz19wwf(8=
z?trK;ohs8QGo84{=^U-Hu(Hz-6=qOn24!aO|J)Kptm>8BL5K>|r!svq)Az#RRUJp+
zp=BG}b9!Z8ae2NG02Sw`>Kv7wqkwdO1Po?d6UOPvPClHGeO(a$t^Qw+$WFi<QJo_I
z>^bZ1U)cKSx6}8hH>ZIO&F?(4f9?Ltox7(r4XI5-o36YA{4aLfnHQe!?BK&O+0zX%
z@9E6a4;`|n6Jp*om}^vzosvC+5P!PVmt9iZ0<xzMV!>r?ac|E2=plTKTWh&_#p_qS
zeps68%<e~dk9WuVz?!ALwtZ#G^(ak2wJG?tLundRJVUByNcIdB%z%l}aq=DN_ybBR
zb+3b7zI^$*>f#>gSC|2n8IViy<v#kRV6s#tSfHI*UUqjv{Fif2F3QZ1!VIa*5CD6O
z<36+H{jKA^W7F`1abFfJKnZej1diXAQ*{?l5&yyVTx>Ewb2<Tj)<j)wu|8`tBlswZ
zVAhA=Q}*ez=4WI5(}U*!9JBzeF`*Fha)v_iEJJUgS-(%ShC*B-5(??62Mr<2L8AHe
zPc9+&ga{vXutw1}s0(W{AcVjF>FSeSgKs}EQFky{q$Fr<&~B1Y<$FNB09`jDO@gL^
zT$!QJUkUv=jclV=%m@%l#`rN|6^h6>5IZ3Sp^;>PqDiVipu|Iv_KXev@&wKhV9(&+
z3!+Ch#uvm1*%<T0QP~&^w2N%ZUd;{cnsa9!h6)4}YZJDO3}%zL`Q7$i@##6ab4Wc3
zy9O$2lMl#7lHU4Nfq<+w>9>${b^v^qB%RYU3|A$d5fb)|G5HNiP|nTNb91w7Zq8Fo
W=EKh>znIM6U%^aJN1z(B@qYm`Eln8!

literal 0
HcmV?d00001

diff --git a/augment_indexer/__pycache__/models.cpython-311.pyc b/augment_indexer/__pycache__/models.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e0588a6ac9888ec59fe52985a9b560720dc2697e
GIT binary patch
literal 3886
zcmbVP&2JmW72hR?U&{|!A91U?vDdN`m4;2haqAjsVmP&uSa!`8wtOfr#Bz5iZoS-9
zXO@vI^gtCkxJ3>;7>G~pp@4%ThyE)|Acw_VN}#9QC|IYQ^4{!{qD)#Xx}1G_^WMzv
z&YR!--qY8`qNc$0_ug&$-+4v(4|YaZDpp<}f%27NDK*7Xt%Ret6SahjdeTX@NsY8q
zwUq1=C*97}GEz@D*><j$Q<a4BnPR2CP^=8iyh<v{H*iN+O_RPX_;OReJWWWi2HyOu
z1k8gwx?aJbZ$=%ZR)|LyU}W*sk;Qmq2}aJHI<jP)d8Cx*_y47BKij2TZ+cAkTU7td
z_V0EY`p2ekd#=9aTJ$Mpnzr4tx!(4yjze{eHf<NC;Bd?E4c(Vd=x}Q3jc2DkMZSX>
z<`di9(QSOR&V9qDSM;V0&ox_yyF>XE-Sph1z0+X^e!~^ra4nrt-f?_h(Z0j5lvR<#
zH=B;Zc{w36zp;JF3`b;tyNh!S2S?z-tWDeWp|h}QGiv&GJlCgB{YUs#a~xn$MqBp!
zXVCddp-N2!I3yke2x>`6Xv#`_2p*{=QL_kyFC|J3>8{6Z-(%0V+@`m`e%yx@!h+j!
z5i-0JE3Sx?x93tOh-<WIwVW2&dL7kzU1;@s6ll;3_4*$>h7)@-^}6MmU`}$MiMe{+
za9t0U0xRbA`ahM|@LkIHH#S;cn{F7Loi=s-4bDu|cWio<8Lg4IE<c8{4R{rGcxCq)
z%fVzR&Va%(4&8}gNUjCs+8|%*-s|2wxEE^0?pAl}0PgM<TO3WD9n%}&etS%F3Qa?N
zWmG}WfKFtDQxM^_&?E-sj_v<N)8YCZW!&aIEG~l2>DaAka*_nFM4Jvs=CEOiR_gCj
z>PA>ZP_m8-xb}4F8Vx|C?V5~Y6b)xQrXn9F3INhEltdAL2;geBZU0fr5E>h!qFT<f
z0*0rEqJ&}&MFzz@3JiA?`~}oxESFF_I}$$vh2I4+xgo;@No9u$ih{DZ7LZkFeX<&o
zUk(<}1_WE5oDE5BaOQkKu=UCLkZcEJJ9-H|fA)s`6+EYJn3Y_HG0`>Ju$*G0Y8jfP
z<b*Gcn<;&{DPLyn)24jcF$i)(`xFS`j^u~^=W+w!oB=1m)^OdJqz_;xINJWi=9+R_
zfI8&7$H-E!!#?tmzSlB*Ih*S)jT{Z!c4R$E2TFwP_dW-;$-K5?HtCBoKz0qkRVE^3
zSQfQJ7S590v_Z?n(@?>)Yl$>Mufwb241D(Iw%6gfBJ_!@L=r4S<UPmwwc|CaU@qrp
z!bXBc!iGRp*$N6wW9$-&RTOWa=p5Qm;ECpc25|yKWCBHSxaFA+$>N}}7%YACZQ)ix
zu=UBUkbE$B?{YwvdaalIMLQs%U+$C3At?{euXOJ|Z}s?>U|xaNCo3U&e{gZFd%ySN
zmzQ3=8xR2KTA!?iWHTU}5&rO_X5;T~;5ik4NXR3?KwT0BkO%6i2vbszu_yHyYf_ik
z0!-#=X&{{J3A}1CCLy;^`SKEzIUuHj$V*~^i~_kT24&QV6W9MlOEAMXY#<%OaU{Mt
z^?hKX2#*mWcPYa}GG-v%YXauuM19OOF(|XfuD$I&p{_`S_Lw;$zhnEYPD2_QRzrf|
zERk+7!!=u?Xm%LjH0H0W(Avh+h|4%&+IWgc<z?-^;PEOvSVB=GZP#ZN?7jiT_t1vo
zHr)IlAl?eaxrOe7?t_B|AvrfF%ysW~?;qR`$?d`X5+D#-pDcys>~T6GKN*ZNQb?93
zrYxT%!=DcH)$YAs_T}P>;sj+@W<&UTK(0qv#jpN9v6`OEl`_$du!>1<gw-tQBdnr+
zjMW?kmV66V^HaWQtUi=Q)c&7h0>z3dA~s_p1$qECSyIG=IY#JYIb$@ujvuk-p25e2
ziIv)3i$Y<@bQ}EO;vLhZoHsj8jL%GLDJKO1E5LIi%kFEgcQvj91;M&f6tFhr@~i=m
zM!iky0Lw$qY*F*ct_OhIvP3RodPu-kk)n)wOcWWFg#C~j+;hu?Sr`_i;dxG!izTUU
zz+OjL`2*A>z5WWdH{g5?Z73|bPlt194CnGdn-3O#@vU|vAlUlkMo3l%3-1I3Tc5lW
zl8dKy&kf*kRJ+xKYDm@w=Pq<Vd498Z=gW@*asgVOT)+ohTJJvS-F$iH1(?^N^~pLg
zazNgX;EZ2gEzikPMC8Y%H03M_9+8H~0|_x14qU&Ok1Ta4o~g^R;ACmEFGr<iom)>T
zW>gVXqLMPIaAZ*_3mKV*Bm>B}os8x8*$<<d_ZIsY9;?F5{{V8BR8@7DNU3C~peQNo
zN}ybvY@xCm%xVK=JviA0%FW<p8>Wd`f*NW{^nQPc)}v!rsiZzuhstc(TRxPYqe=gY
zlE@DeOX?>uZMHm=_9H0|lQ8^?<#<9x&eheQ50z<gh_)jX<4HK^Q}|C5UihwheW*-}
PL$n>C7*9GeNaErLY$xh=

literal 0
HcmV?d00001

diff --git a/augment_indexer/file_filter.py b/augment_indexer/file_filter.py
new file mode 100644
index 0000000..88ab035
--- /dev/null
+++ b/augment_indexer/file_filter.py
@@ -0,0 +1,123 @@
+"""
+File filtering logic for GitHub repository indexing.
+"""
+
+import re
+from pathlib import Path
+from typing import Optional
+
+# Keyish pattern regex - matches files that likely contain secrets/keys
+KEYISH_PATTERN = re.compile(
+    r'^(\.git|.*\.pem|.*\.key|.*\.pfx|.*\.p12|.*\.jks|.*\.keystore|.*\.pkcs12|.*\.crt|.*\.cer|id_rsa|id_ed25519|id_ecdsa|id_dsa)$'
+)
+
+# Default max file size in bytes (1 MB)
+DEFAULT_MAX_FILE_SIZE = 1024 * 1024  # 1 MB
+
+
+def always_ignore_path(path: str) -> bool:
+    """
+    Check if a path should always be ignored (security measure).
+
+    Args:
+        path: The file path to check.
+
+    Returns:
+        True if the path contains ".." and should be ignored.
+    """
+    return ".." in path
+
+
+def is_keyish_path(path: str) -> bool:
+    """
+    Check if a path matches the keyish pattern (secrets/keys).
+
+    Args:
+        path: The file path to check.
+
+    Returns:
+        True if the filename matches patterns for secret/key files.
+    """
+    # Extract filename from path
+    filename = Path(path).name
+    return bool(KEYISH_PATTERN.match(filename))
+
+
+def is_valid_file_size(size_bytes: int, max_file_size: int = DEFAULT_MAX_FILE_SIZE) -> bool:
+    """
+    Check if file size is valid for upload.
+
+    Args:
+        size_bytes: The size of the file in bytes.
+        max_file_size: Maximum allowed file size in bytes. Defaults to 1 MB.
+
+    Returns:
+        True if the file size is within the allowed limit.
+    """
+    return size_bytes <= max_file_size
+
+
+def is_valid_utf8(content: bytes) -> bool:
+    """
+    Check if file content is valid UTF-8 (not binary).
+
+    Args:
+        content: The file content as bytes.
+
+    Returns:
+        True if the content is valid UTF-8, False if it's binary or invalid.
+    """
+    try:
+        content.decode("utf-8")
+        return True
+    except UnicodeDecodeError:
+        return False
+
+
+def should_filter_file(
+    path: str,
+    content: bytes,
+    max_file_size: Optional[int] = None,
+) -> dict:
+    """
+    Check if a file should be filtered out.
+
+    Returns {"filtered": True, "reason": "..."} if file should be skipped.
+    Returns {"filtered": False} if file should be included.
+
+    Priority order (from file-filtering.md):
+        1. Path validation (contains "..")
+        2. File size check
+        3. .augmentignore rules (checked by caller)
+        4. Keyish patterns
+        5. .gitignore rules (checked by caller)
+        6. UTF-8 validation
+
+    Args:
+        path: The file path to check.
+        content: The file content as bytes.
+        max_file_size: Maximum allowed file size in bytes. Defaults to DEFAULT_MAX_FILE_SIZE.
+
+    Returns:
+        A dict with "filtered" (bool) and optionally "reason" (str) keys.
+    """
+    effective_max_size = max_file_size if max_file_size is not None else DEFAULT_MAX_FILE_SIZE
+
+    # 1. Check for ".." in path (security)
+    if always_ignore_path(path):
+        return {"filtered": True, "reason": "path_contains_dotdot"}
+
+    # 2. Check file size
+    if not is_valid_file_size(len(content), effective_max_size):
+        return {"filtered": True, "reason": f"file_too_large ({len(content)} bytes)"}
+
+    # 3. Check keyish patterns (secrets/keys)
+    if is_keyish_path(path):
+        return {"filtered": True, "reason": "keyish_pattern"}
+
+    # 4. Check UTF-8 validity (binary detection)
+    if not is_valid_utf8(content):
+        return {"filtered": True, "reason": "binary_file"}
+
+    return {"filtered": False}
+
diff --git a/augment_indexer/github_client.py b/augment_indexer/github_client.py
new file mode 100644
index 0000000..f69bd62
--- /dev/null
+++ b/augment_indexer/github_client.py
@@ -0,0 +1,307 @@
+"""
+GitHub API client for fetching repository data.
+"""
+
+import io
+import tarfile
+
+import pathspec
+import requests
+from github import Github
+from github.GithubException import GithubException
+
+from .file_filter import should_filter_file
+from .models import FileChange
+
+
+class GitHubClient:
+    """GitHub API client for fetching repository data."""
+
+    def __init__(self, token: str) -> None:
+        """
+        Initialize the GitHub client with an authentication token.
+
+        Args:
+            token: GitHub personal access token or GitHub App token.
+        """
+        self._github = Github(token)
+        self._token = token
+
+    def resolve_ref(self, owner: str, repo: str, ref: str) -> str:
+        """
+        Resolve a ref (like "HEAD", "main", or a commit SHA) to a commit SHA.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to resolve.
+
+        Returns:
+            The full 40-character commit SHA.
+
+        Raises:
+            Exception: If the ref cannot be resolved.
+        """
+        try:
+            repository = self._github.get_repo(f"{owner}/{repo}")
+            commit = repository.get_commit(ref)
+            return commit.sha
+        except GithubException as error:
+            raise Exception(
+                f'Failed to resolve ref "{ref}" for {owner}/{repo}: {error}'
+            ) from error
+
+    def download_tarball(self, owner: str, repo: str, ref: str) -> dict[str, str]:
+        """
+        Download repository as tarball and extract files.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to download.
+
+        Returns:
+            Dictionary mapping file paths to their contents.
+        """
+        print(f"Downloading tarball for {owner}/{repo}@{ref}...")
+
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        tarball_url = repository.get_archive_link("tarball", ref)
+
+        # Download tarball (10 minute timeout to handle large repositories)
+        # Include auth header for private repos
+        headers = {"Authorization": f"Bearer {self._token}"}
+        response = requests.get(tarball_url, headers=headers, stream=True, timeout=600)
+        if not response.ok:
+            raise Exception(f"Failed to download tarball: {response.reason}")
+
+        # Load ignore patterns
+        augmentignore, gitignore = self._load_ignore_patterns(owner, repo, ref)
+
+        # Track filtering statistics
+        files: dict[str, str] = {}
+        total_files = 0
+        filtered_files = 0
+        filter_reasons: dict[str, int] = {}
+
+        # Extract files from tarball
+        tarball_data = io.BytesIO(response.content)
+        with tarfile.open(fileobj=tarball_data, mode="r:gz") as tar:
+            for member in tar.getmembers():
+                # Skip directories and symlinks
+                if not member.isfile():
+                    continue
+
+                total_files += 1
+
+                # Remove the root directory prefix (e.g., "owner-repo-sha/")
+                path_parts = member.name.split("/")
+                path_parts.pop(0)  # Remove first component
+                file_path = "/".join(path_parts)
+
+                if not file_path:
+                    continue
+
+                # Read file contents
+                file_obj = tar.extractfile(member)
+                if file_obj is None:
+                    continue
+                content_bytes = file_obj.read()
+
+                # Apply filtering in priority order:
+                # 1. .augmentignore
+                if augmentignore and augmentignore.match_file(file_path):
+                    filtered_files += 1
+                    filter_reasons["augmentignore"] = filter_reasons.get("augmentignore", 0) + 1
+                    continue
+
+                # 2. Path validation, file size, keyish patterns, UTF-8 validation
+                filter_result = should_filter_file(path=file_path, content=content_bytes)
+
+                if filter_result["filtered"]:
+                    filtered_files += 1
+                    reason = filter_result.get("reason", "unknown")
+                    filter_reasons[reason] = filter_reasons.get(reason, 0) + 1
+                    continue
+
+                # 3. .gitignore (checked last)
+                if gitignore and gitignore.match_file(file_path):
+                    filtered_files += 1
+                    filter_reasons["gitignore"] = filter_reasons.get("gitignore", 0) + 1
+                    continue
+
+                # File passed all filters
+                try:
+                    contents = content_bytes.decode("utf-8")
+                    files[file_path] = contents
+                except UnicodeDecodeError:
+                    # This should not happen if is_valid_utf8() is working correctly
+                    filtered_files += 1
+                    filter_reasons["decode_error"] = filter_reasons.get("decode_error", 0) + 1
+                    print(f"Warning: File {file_path} passed UTF-8 validation but failed to decode")
+
+        print(f"Extracted {len(files)} files from tarball")
+        print(f"Filtered {filtered_files} of {total_files} files. Reasons: {filter_reasons}")
+        return files
+
+    def compare_commits(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> dict:
+        """
+        Compare two commits and get file changes.
+        """
+        print(f"Comparing {base}...{head}...")
+
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        comparison = repository.compare(base, head)
+
+        files: list[FileChange] = []
+
+        for file in comparison.files:
+            change = FileChange(
+                path=file.filename,
+                status=self._map_github_status(file.status),
+                previousFilename=file.previous_filename,
+            )
+
+            # Download file contents for added/modified files
+            if change.status in ("added", "modified"):
+                try:
+                    contents = self.get_file_contents(owner, repo, file.filename, head)
+                    change.contents = contents
+                except Exception as error:
+                    print(f"Warning: Failed to download {file.filename}: {error}")
+
+            files.append(change)
+
+        return {
+            "files": files,
+            "commits": comparison.total_commits,
+            "totalChanges": len(comparison.files),
+        }
+
+    def get_file_contents(
+        self, owner: str, repo: str, path: str, ref: str
+    ) -> str:
+        """
+        Get file contents at a specific ref.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            path: File path within the repository.
+            ref: Git ref to get contents at.
+
+        Returns:
+            The file contents as a string.
+
+        Raises:
+            Exception: If the path is not a file.
+        """
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        content = repository.get_contents(path, ref)
+
+        if isinstance(content, list):
+            raise Exception(f"{path} is not a file")
+
+        return content.decoded_content.decode("utf-8")
+
+    def _load_ignore_patterns(
+        self, owner: str, repo: str, ref: str
+    ) -> tuple[pathspec.PathSpec | None, pathspec.PathSpec | None]:
+        """
+        Load .gitignore and .augmentignore patterns separately.
+
+        Returns both filters to maintain proper priority order:
+        .augmentignore → keyish → .gitignore
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to load patterns from.
+
+        Returns:
+            Tuple of (augmentignore, gitignore) PathSpec objects, or None if not found.
+        """
+        augmentignore: pathspec.PathSpec | None = None
+        gitignore: pathspec.PathSpec | None = None
+
+        # Try to load .gitignore
+        try:
+            gitignore_content = self.get_file_contents(owner, repo, ".gitignore", ref)
+            gitignore = pathspec.PathSpec.from_lines("gitwildmatch", gitignore_content.splitlines())
+        except Exception:
+            # .gitignore doesn't exist
+            pass
+
+        # Try to load .augmentignore
+        try:
+            augmentignore_content = self.get_file_contents(owner, repo, ".augmentignore", ref)
+            augmentignore = pathspec.PathSpec.from_lines("gitwildmatch", augmentignore_content.splitlines())
+        except Exception:
+            # .augmentignore doesn't exist
+            pass
+
+        return augmentignore, gitignore
+
+    def _map_github_status(self, status: str) -> str:
+        """
+        Map GitHub file status to our FileChange status.
+
+        Args:
+            status: GitHub file status string.
+
+        Returns:
+            Normalized status string.
+        """
+        status_map = {
+            "added": "added",
+            "modified": "modified",
+            "removed": "removed",
+            "renamed": "renamed",
+        }
+        return status_map.get(status, "modified")
+
+    def ignore_files_changed(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> bool:
+        """
+        Check if ignore files changed between commits.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            base: Base commit SHA.
+            head: Head commit SHA.
+
+        Returns:
+            True if .gitignore or .augmentignore changed, False otherwise.
+        """
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        comparison = repository.compare(base, head)
+
+        ignore_files = [".gitignore", ".augmentignore"]
+        return any(file.filename in ignore_files for file in comparison.files)
+
+    def is_force_push(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> bool:
+        """
+        Check if the push was a force push.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            base: Base commit SHA.
+            head: Head commit SHA.
+
+        Returns:
+            True if the push was a force push, False otherwise.
+        """
+        try:
+            repository = self._github.get_repo(f"{owner}/{repo}")
+            repository.compare(base, head)
+            return False
+        except GithubException:
+            # If comparison fails, it's likely a force push
+            return True
diff --git a/augment_indexer/index_manager.py b/augment_indexer/index_manager.py
new file mode 100644
index 0000000..c2bf48f
--- /dev/null
+++ b/augment_indexer/index_manager.py
@@ -0,0 +1,395 @@
+"""
+Index Manager - Core indexing logic
+"""
+
+import json
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+from auggie_sdk.context import DirectContext, File
+
+from .github_client import GitHubClient
+from .models import FileChange, IndexConfig, IndexResult, IndexState, RepositoryInfo
+
+DEFAULT_MAX_COMMITS = 100
+DEFAULT_MAX_FILES = 500
+
+
+class IndexManager:
+    """Index Manager - Core indexing logic for GitHub repositories."""
+
+    def __init__(
+        self, context: DirectContext, config: IndexConfig, state_path: str
+    ) -> None:
+        """
+        Initialize the IndexManager.
+
+        Args:
+            context: DirectContext instance for indexing operations.
+            config: Configuration for the indexing operation.
+            state_path: Path to the state file for persistence.
+        """
+        self._context = context
+        self._config = config
+        self._state_path = state_path
+        self._github = GitHubClient(config.githubToken)
+
+    def resolve_commit_sha(self) -> None:
+        """
+        Resolve the current commit ref to an actual commit SHA.
+
+        This handles cases where GITHUB_SHA might be "HEAD" or a branch name.
+        Updates the config.currentCommit with the resolved SHA.
+        """
+        resolved_sha = self._github.resolve_ref(
+            self._config.owner, self._config.repo, self._config.currentCommit
+        )
+        self._config.currentCommit = resolved_sha
+
+    def _load_state(self) -> Optional[IndexState]:
+        """
+        Load index state from file system.
+
+        EXTENDING TO OTHER STORAGE BACKENDS:
+        Replace this method to load state from your preferred storage:
+        - Redis: Use redis-py client to GET the state JSON
+        - S3: Use boto3 to get_object from S3 bucket
+        - Database: Query your database for the state record
+
+        Example for Redis:
+            import redis
+            r = redis.Redis.from_url(redis_url)
+            data = r.get(state_key)
+            return json.loads(data) if data else None
+
+        Example for S3:
+            import boto3
+            s3 = boto3.client('s3')
+            response = s3.get_object(Bucket=bucket, Key=key)
+            data = response['Body'].read().decode('utf-8')
+            return json.loads(data)
+
+        Returns:
+            The loaded IndexState or None if the file doesn't exist.
+        """
+        try:
+            with open(self._state_path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            return None
+
+    def _save_state(self, state: IndexState) -> None:
+        """
+        Save index state to file system.
+
+        EXTENDING TO OTHER STORAGE BACKENDS:
+        Replace this method to save state to your preferred storage:
+        - Redis: Use redis-py client to SET the state JSON
+        - S3: Use boto3 to put_object to S3 bucket
+        - Database: Insert or update the state record in your database
+
+        Example for Redis:
+            import redis
+            r = redis.Redis.from_url(redis_url)
+            r.set(state_key, json.dumps(state))
+
+        Example for S3:
+            import boto3
+            s3 = boto3.client('s3')
+            s3.put_object(
+                Bucket=bucket,
+                Key=key,
+                Body=json.dumps(state),
+                ContentType='application/json'
+            )
+
+        Note: The state is just a JSON object (IndexState type) that can be
+        serialized and stored anywhere. For distributed systems, consider using
+        Redis or a database for shared state across multiple workers.
+
+        Args:
+            state: The IndexState to save.
+        """
+        # Ensure directory exists
+        Path(self._state_path).parent.mkdir(parents=True, exist_ok=True)
+
+        # Write state to file
+        with open(self._state_path, "w", encoding="utf-8") as f:
+            json.dump(state, f, indent=2)
+
+    def index(self) -> IndexResult:
+        """
+        Main indexing entry point.
+
+        Returns:
+            IndexResult with success status and indexing details.
+        """
+        print(
+            f"Starting index for {self._config.owner}/{self._config.repo}"
+            f"@{self._config.branch}"
+        )
+
+        try:
+            # Load previous state
+            previous_state = self._load_state()
+
+            # If we have previous state, we'll need to create a new context with the imported state
+            # For now, we'll handle this in the incremental update logic
+
+            # Determine if we need full re-index
+            should_reindex, reason = self._should_full_reindex(previous_state)
+
+            if should_reindex:
+                return self._full_reindex(reason)
+
+            # Perform incremental update
+            # previous_state is guaranteed to be non-null here
+            if not previous_state:
+                raise RuntimeError("previous_state should not be None at this point")
+            return self._incremental_update(previous_state)
+        except Exception as error:
+            print(f"Indexing failed: {error}")
+            return IndexResult(
+                success=False,
+                type="full",
+                filesIndexed=0,
+                filesDeleted=0,
+                checkpointId="",
+                commitSha=self._config.currentCommit,
+                error=str(error),
+            )
+
+    def _should_full_reindex(
+        self, previous_state: Optional[IndexState]
+    ) -> tuple[bool, Optional[str]]:
+        """
+        Determine if full re-index is needed.
+
+        Args:
+            previous_state: The previous index state, or None if first run.
+
+        Returns:
+            Tuple of (should_reindex, reason).
+        """
+        # No previous state - first run
+        if not previous_state:
+            return (True, "first_run")
+
+        # Different repository
+        if (
+            previous_state["repository"]["owner"] != self._config.owner
+            or previous_state["repository"]["name"] != self._config.repo
+        ):
+            return (True, "different_repository")
+
+        # Same commit - no changes
+        if previous_state["lastCommitSha"] == self._config.currentCommit:
+            print("No changes detected")
+            return (False, None)
+
+        # Check for force push
+        is_force_push = self._github.is_force_push(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        if is_force_push:
+            return (True, "force_push")
+
+        # Get comparison
+        comparison = self._github.compare_commits(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        # Too many commits
+        max_commits = self._config.maxCommits or DEFAULT_MAX_COMMITS
+        if comparison["commits"] > max_commits:
+            return (
+                True,
+                f"too_many_commits ({comparison['commits']} > {max_commits})",
+            )
+
+        # Too many file changes
+        max_files = self._config.maxFiles or DEFAULT_MAX_FILES
+        if comparison["totalChanges"] > max_files:
+            return (
+                True,
+                f"too_many_files ({comparison['totalChanges']} > {max_files})",
+            )
+
+        # Check if ignore files changed
+        ignore_changed = self._github.ignore_files_changed(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        if ignore_changed:
+            return (True, "ignore_files_changed")
+
+        return (False, None)
+
+    def _full_reindex(self, reason: Optional[str]) -> IndexResult:
+        """
+        Perform full repository re-index.
+
+        Args:
+            reason: The reason for the full re-index.
+
+        Returns:
+            IndexResult with the result of the full re-index.
+        """
+        print(f"Performing full re-index (reason: {reason or 'unknown'})")
+
+        # Download entire repository as tarball
+        files = self._github.download_tarball(
+            self._config.owner, self._config.repo, self._config.currentCommit
+        )
+
+        # Add all files to index
+        files_to_index = [
+            File(path=path, contents=contents) for path, contents in files.items()
+        ]
+
+        print(f"Adding {len(files_to_index)} files to index...")
+        self._context.add_to_index(files_to_index)
+
+        # Export DirectContext state
+        context_state = self._context.export()
+        context_state_dict = context_state.to_dict()
+
+        new_state: IndexState = {
+            "contextState": context_state_dict,
+            "lastCommitSha": self._config.currentCommit,
+            "repository": RepositoryInfo(
+                owner=self._config.owner,
+                name=self._config.repo,
+            ),
+        }
+
+        # Save state
+        self._save_state(new_state)
+
+        return IndexResult(
+            success=True,
+            type="full",
+            filesIndexed=len(files_to_index),
+            filesDeleted=0,
+            checkpointId=context_state.checkpoint_id or "",
+            commitSha=self._config.currentCommit,
+            reindexReason=reason,
+        )
+
+    def _incremental_update(self, previous_state: IndexState) -> IndexResult:
+        """
+        Perform incremental update.
+
+        Args:
+            previous_state: The previous index state.
+
+        Returns:
+            IndexResult with the result of the incremental update.
+        """
+        print("Performing incremental update...")
+
+        # Create a temporary file with the previous context state
+        # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open
+        temp_file = tempfile.NamedTemporaryFile(
+            mode="w", suffix=".json", prefix="github-indexer-incremental-", delete=False
+        )
+        temp_path = Path(temp_file.name)
+        try:
+            json.dump(previous_state["contextState"], temp_file, indent=2)
+            temp_file.close()  # Close before reading on Windows
+
+            # Create a new context from the previous state
+            self._context = DirectContext.import_from_file(
+                str(temp_path),
+                api_key=self._config.apiToken,
+                api_url=self._config.apiUrl,
+            )
+        finally:
+            temp_path.unlink(missing_ok=True)
+
+        # Get file changes
+        comparison = self._github.compare_commits(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        # Process changes
+        files_to_add, files_to_delete = self._process_file_changes(comparison["files"])
+
+        print(f"Adding {len(files_to_add)} files, deleting {len(files_to_delete)} files")
+
+        # Update index
+        if files_to_add:
+            self._context.add_to_index(files_to_add)
+
+        if files_to_delete:
+            self._context.remove_from_index(files_to_delete)
+
+        # Export DirectContext state
+        context_state = self._context.export()
+        context_state_dict = context_state.to_dict()
+
+        new_state: IndexState = {
+            "contextState": context_state_dict,
+            "lastCommitSha": self._config.currentCommit,
+            "repository": previous_state["repository"],
+        }
+
+        # Save state
+        self._save_state(new_state)
+
+        return IndexResult(
+            success=True,
+            type="incremental",
+            filesIndexed=len(files_to_add),
+            filesDeleted=len(files_to_delete),
+            checkpointId=context_state.checkpoint_id or "",
+            commitSha=self._config.currentCommit,
+        )
+
+    def _process_file_changes(
+        self, changes: list[FileChange]
+    ) -> tuple[list[File], list[str]]:
+        """
+        Process file changes and categorize them for indexing.
+
+        Args:
+            changes: List of file changes from the comparison.
+
+        Returns:
+            Tuple of (files_to_add, files_to_delete).
+        """
+        files_to_add: list[File] = []
+        files_to_delete: list[str] = []
+
+        for change in changes:
+            if change.status in ("added", "modified"):
+                if change.contents:
+                    files_to_add.append(
+                        File(path=change.path, contents=change.contents)
+                    )
+            elif change.status == "removed":
+                files_to_delete.append(change.path)
+            elif change.status == "renamed":
+                if change.previousFilename:
+                    files_to_delete.append(change.previousFilename)
+                if change.contents:
+                    files_to_add.append(
+                        File(path=change.path, contents=change.contents)
+                    )
+
+        return files_to_add, files_to_delete
+
diff --git a/augment_indexer/main.py b/augment_indexer/main.py
new file mode 100644
index 0000000..fd10065
--- /dev/null
+++ b/augment_indexer/main.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""
+Main entry point for GitHub Action Indexer
+
+Usage:
+    cd examples/python-sdk/context
+    python -m github_action_indexer index
+"""
+
+import os
+import re
+import sys
+
+from auggie_sdk.context import DirectContext
+
+from .index_manager import IndexManager
+from .models import IndexConfig
+
+
+def get_api_credentials() -> tuple[str, str]:
+    """Get API credentials from environment variables."""
+    api_token = os.environ.get("AUGMENT_API_TOKEN")
+    if not api_token:
+        raise ValueError("AUGMENT_API_TOKEN environment variable is required")
+
+    api_url = os.environ.get("AUGMENT_API_URL")
+    if not api_url:
+        raise ValueError(
+            "AUGMENT_API_URL environment variable is required. Please set it to your "
+            "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')"
+        )
+
+    return api_token, api_url
+
+
+def parse_repository_info() -> tuple[str, str, str, str]:
+    """
+    Parse repository information from environment variables.
+    Returns (owner, repo, branch, current_commit).
+    """
+    repository = os.environ.get("GITHUB_REPOSITORY", "")
+    parts = repository.split("/")
+
+    if len(parts) != 2 or not parts[0] or not parts[1]:
+        raise ValueError('GITHUB_REPOSITORY must be in format "owner/repo"')
+
+    owner, repo = parts
+
+    # Extract branch name from GitHub ref
+    github_ref = os.environ.get("GITHUB_REF", "")
+    github_ref_name = os.environ.get("GITHUB_REF_NAME", "")
+
+    if github_ref.startswith("refs/heads/"):
+        branch = github_ref_name
+    elif github_ref.startswith("refs/tags/"):
+        branch = f"tag/{github_ref_name}"
+    elif github_ref_name:
+        branch = github_ref_name
+    else:
+        branch = os.environ.get("BRANCH", "main")
+
+    current_commit = os.environ.get("GITHUB_SHA", "")
+    if not current_commit:
+        raise ValueError("GITHUB_SHA environment variable is required")
+
+    return owner, repo, branch, current_commit
+
+
+def load_config() -> IndexConfig:
+    """Load configuration from environment variables."""
+    github_token = os.environ.get("GITHUB_TOKEN")
+    if not github_token:
+        raise ValueError("GITHUB_TOKEN environment variable is required")
+
+    api_token, api_url = get_api_credentials()
+    owner, repo, branch, current_commit = parse_repository_info()
+
+    max_commits = os.environ.get("MAX_COMMITS")
+    max_files = os.environ.get("MAX_FILES")
+
+    return IndexConfig(
+        apiToken=api_token,
+        apiUrl=api_url,
+        githubToken=github_token,
+        owner=owner,
+        repo=repo,
+        branch=branch,
+        currentCommit=current_commit,
+        maxCommits=int(max_commits) if max_commits else None,
+        maxFiles=int(max_files) if max_files else None,
+    )
+
+
+def get_state_path(branch: str) -> str:
+    """Get the state file path for the current branch."""
+    sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch)
+    return os.environ.get(
+        "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json"
+    )
+
+
+def main() -> None:
+    """Main function."""
+    print("GitHub Action Indexer - Starting...")
+
+    try:
+        # Load configuration
+        config = load_config()
+        state_path = get_state_path(config.branch)
+
+        print(f"Repository: {config.owner}/{config.repo}")
+        print(f"Branch: {config.branch}")
+        print(f"Commit ref: {config.currentCommit}")
+        print(f"State path: {state_path}")
+
+        # Create DirectContext
+        context = DirectContext.create(api_key=config.apiToken, api_url=config.apiUrl)
+
+        # Create index manager and resolve commit SHA
+        manager = IndexManager(context, config, state_path)
+        manager.resolve_commit_sha()
+
+        print(f"Resolved commit SHA: {config.currentCommit}")
+
+        # Perform indexing
+        result = manager.index()
+
+        # Print results
+        print("\n=== Indexing Results ===")
+        print(f"Success: {result.success}")
+        print(f"Type: {result.type}")
+        print(f"Files Indexed: {result.filesIndexed}")
+        print(f"Files Deleted: {result.filesDeleted}")
+        print(f"Checkpoint ID: {result.checkpointId}")
+        print(f"Commit SHA: {result.commitSha}")
+
+        if result.reindexReason:
+            print(f"Re-index Reason: {result.reindexReason}")
+
+        if result.error:
+            print(f"Error: {result.error}", file=sys.stderr)
+            sys.exit(1)
+
+        # Set GitHub Actions output
+        github_output = os.environ.get("GITHUB_OUTPUT")
+        if github_output:
+            output_lines = [
+                f"success={result.success}",
+                f"type={result.type}",
+                f"files_indexed={result.filesIndexed}",
+                f"files_deleted={result.filesDeleted}",
+                f"checkpoint_id={result.checkpointId}",
+                f"commit_sha={result.commitSha}",
+            ]
+            with open(github_output, "a") as f:
+                f.write("\n".join(output_lines) + "\n")
+
+        print("\nIndexing completed successfully!")
+
+    except Exception as error:
+        print(f"Fatal error: {error}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/augment_indexer/models.py b/augment_indexer/models.py
new file mode 100644
index 0000000..8b3dfc0
--- /dev/null
+++ b/augment_indexer/models.py
@@ -0,0 +1,131 @@
+"""
+Types for the GitHub Action Indexer
+
+This module defines the data types used by the GitHub Action Indexer
+for tracking index state, file changes, configuration, and results.
+"""
+
+from dataclasses import dataclass
+from typing import Literal, Optional
+
+from typing_extensions import TypedDict
+
+from auggie_sdk.context.models import DirectContextState
+
+
+class RepositoryInfo(TypedDict):
+    """Repository information for index state."""
+
+    owner: str  # Repository owner
+    name: str  # Repository name
+
+
+class IndexState(TypedDict):
+    """
+    Persistent state for the GitHub Action Indexer.
+
+    This state is stored between indexing runs to enable incremental indexing.
+    """
+
+    contextState: DirectContextState
+    """DirectContext state (checkpoint, blobs, etc.)"""
+
+    lastCommitSha: str
+    """Last indexed commit SHA (must be a full 40-character SHA, not a ref like 'HEAD')"""
+
+    repository: RepositoryInfo
+    """Repository information - used to verify we're indexing the same repository"""
+
+
+@dataclass
+class FileChange:
+    """
+    Represents a file change detected between commits.
+
+    Used to track what files need to be indexed or removed from the index.
+    """
+
+    path: str
+    """File path"""
+
+    status: Literal["added", "modified", "removed", "renamed"]
+    """Change status: added, modified, removed, renamed"""
+
+    previousFilename: Optional[str] = None
+    """Previous filename (for renames)"""
+
+    contents: Optional[str] = None
+    """File contents (for added/modified files)"""
+
+    oldBlobName: Optional[str] = None
+    """Blob name from previous index (for modified/removed files)"""
+
+
+@dataclass
+class IndexConfig:
+    """
+    Configuration for the GitHub Action Indexer.
+
+    Contains all the settings needed to perform indexing of a GitHub repository.
+    """
+
+    apiToken: str
+    """Augment API token"""
+
+    apiUrl: str
+    """Augment API URL (provided via AUGMENT_API_URL env var)"""
+
+    githubToken: str
+    """GitHub token"""
+
+    owner: str
+    """Repository owner"""
+
+    repo: str
+    """Repository name"""
+
+    branch: str
+    """Branch to index"""
+
+    currentCommit: str
+    """Current commit SHA"""
+
+    maxCommits: Optional[int] = None
+    """Maximum commits before full re-index"""
+
+    maxFiles: Optional[int] = None
+    """Maximum file changes before full re-index"""
+
+
+@dataclass
+class IndexResult:
+    """
+    Result from an indexing operation.
+
+    Contains information about what was indexed and whether it was successful.
+    """
+
+    success: bool
+    """Whether indexing was successful"""
+
+    type: Literal["full", "incremental", "no-changes"]
+    """Type of indexing performed"""
+
+    filesIndexed: int
+    """Number of files indexed"""
+
+    filesDeleted: int
+    """Number of files deleted"""
+
+    checkpointId: str
+    """New checkpoint ID"""
+
+    commitSha: str
+    """Commit SHA that was indexed"""
+
+    error: Optional[str] = None
+    """Error message if failed"""
+
+    reindexReason: Optional[str] = None
+    """Reason for full re-index (if applicable)"""
+
diff --git a/augment_indexer/requirements.txt b/augment_indexer/requirements.txt
new file mode 100644
index 0000000..5552b4e
--- /dev/null
+++ b/augment_indexer/requirements.txt
@@ -0,0 +1,14 @@
+# GitHub Action Indexer dependencies
+
+# Augment SDK for indexing and search
+auggie-sdk>=0.1.0
+
+# GitHub API client
+PyGithub>=2.1.0
+
+# HTTP requests (for tarball download)
+requests>=2.25.0
+
+# Gitignore-style pattern matching
+pathspec>=0.11.0
+
diff --git a/augment_indexer/search.py b/augment_indexer/search.py
new file mode 100644
index 0000000..fdac426
--- /dev/null
+++ b/augment_indexer/search.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""
+CLI tool to search the indexed repository
+
+Usage:
+    cd examples/python-sdk/context
+    python -m github_action_indexer search "your search query"
+    python -m github_action_indexer search "your search query" --max-chars 5000
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+from auggie_sdk.context import DirectContext
+
+from .models import IndexState
+
+
+def get_state_path() -> str:
+    """Get the state file path for the current branch."""
+    branch = os.environ.get("BRANCH", "main")
+    sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch)
+    return os.environ.get(
+        "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json"
+    )
+
+
+def load_state(state_path: str) -> Optional[IndexState]:
+    """Load index state from file system."""
+    try:
+        with open(state_path, "r") as f:
+            data = f.read()
+        return json.loads(data)
+    except FileNotFoundError:
+        return None
+
+
+def main() -> None:
+    """Main search function."""
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(
+        description="Search the indexed repository",
+        epilog='Example: python search.py "authentication functions"',
+    )
+    parser.add_argument("query", help="Search query")
+    parser.add_argument(
+        "--max-chars",
+        type=int,
+        help="Maximum number of characters in output",
+        dest="max_chars",
+    )
+    args = parser.parse_args()
+
+    # Get API credentials
+    api_token = os.environ.get("AUGMENT_API_TOKEN")
+    if not api_token:
+        print("Error: AUGMENT_API_TOKEN environment variable is required", file=sys.stderr)
+        sys.exit(1)
+
+    api_url = os.environ.get("AUGMENT_API_URL")
+    if not api_url:
+        print(
+            "Error: AUGMENT_API_URL environment variable is required. Please set it to your "
+            "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    print(f'Searching for: "{args.query}"')
+    if args.max_chars is not None:
+        print(f"Limiting results to max {args.max_chars} characters\n")
+    else:
+        print()
+
+    try:
+        # Load the index state first
+        state_path = get_state_path()
+        print(f"Loading index state from: {state_path}")
+        state = load_state(state_path)
+
+        if not state:
+            print("Error: No index state found. Run indexing first.", file=sys.stderr)
+            print("  python -m github_action_indexer index", file=sys.stderr)
+            sys.exit(1)
+
+        # Create a temporary file with the context state for import
+        # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open
+        temp_file = tempfile.NamedTemporaryFile(
+            mode="w", suffix=".json", prefix="github-indexer-state-", delete=False
+        )
+        temp_path = Path(temp_file.name)
+        try:
+            json.dump(state["contextState"], temp_file, indent=2)
+            temp_file.close()  # Close before reading on Windows
+
+            # Import state using DirectContext.import_from_file
+            context = DirectContext.import_from_file(
+                str(temp_path), api_key=api_token, api_url=api_url
+            )
+        finally:
+            temp_path.unlink(missing_ok=True)
+
+        file_count = len(state["contextState"].get("blobs", []))
+
+        print(f"Loaded index: {file_count} files indexed")
+        print(f"Repository: {state['repository']['owner']}/{state['repository']['name']}")
+        print(f"Last indexed commit: {state['lastCommitSha']}\n")
+
+        # Perform search with optional character limit
+        results = context.search(args.query, max_output_length=args.max_chars)
+
+        if not results or results.strip() == "":
+            print("No results found.")
+            return
+
+        print("Search results:\n")
+        print(results)
+
+    except Exception as error:
+        print(f"Search failed: {error}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+