Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,29 @@
# 2. Fill in your API key below
# 3. Never commit your .env file — keep it local
#
# Get your Google API key at: https://aistudio.google.com/apikey
# (Free tier available — Gemini 3 Flash is very cheap for file processing)
# ─────────────────────────────────────────────────────────────
# Gemini Authentication — Choose ONE method:
# ─────────────────────────────────────────────────────────────

# METHOD 1: API Key (recommended for individuals)
# Get your free key at: https://aistudio.google.com/apikey
GOOGLE_API_KEY=your_google_api_key_here

# METHOD 2: Vertex AI (for Google Cloud users)
# Use this if you want to use your GCP project for billing/quota management
# 1. Enable Vertex AI in your Google Cloud project
# 2. Set GOOGLE_GENAI_USE_VERTEXAI=true below
# 3. Fill in your project ID and location
# 4. Authenticate: gcloud auth application-default login
GOOGLE_GENAI_USE_VERTEXAI=false
GOOGLE_CLOUD_PROJECT=your_google_cloud_project_here
GOOGLE_CLOUD_LOCATION=us-central1

# Model Selection (applies to both authentication methods):
# - gemini-3-flash-preview (fast, cheap, recommended)
# - gemini-3-pro-preview (slower, higher quality)
# - gemini-2.0-flash-exp (experimental, Vertex AI)
MODEL=gemini-3-flash-preview

# How to check which method is active:
# Run: python scripts/gemini_auth.py
65 changes: 58 additions & 7 deletions scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,45 @@ If you want a fast overview of what's in a folder before deciding what to keep,

## Prerequisites

**Step 1: Install dependencies**

```bash
# 1. Get a free Google API key
# https://aistudio.google.com/apikey
# The setup scripts do this automatically, or run manually:
pip install google-genai python-dotenv pdfplumber python-pptx python-docx openpyxl
```

# 2. Add it to your vault's .env file
echo "GOOGLE_API_KEY=your_key_here" > .env
**Step 2: Choose your authentication method**

# 3. Install dependencies (setup.sh / setup.ps1 does this automatically)
pip install google-genai python-dotenv pdfplumber python-pptx python-docx openpyxl
### Authentication: Choose Your Method

Both scripts support two authentication methods. Choose the one that fits your needs:

**Option 1: Google AI Studio API Key** *(Recommended for individuals)*

```bash
# 1. Get a free key at: https://aistudio.google.com/apikey
# 2. Add to your .env file:
echo "GOOGLE_API_KEY=your_key_here" >> .env
```

**Option 2: Vertex AI** *(For Google Cloud users)*

```bash
# 1. Enable Vertex AI in your Google Cloud project
# 2. Add to your .env file:
cat >> .env << 'EOF'
GOOGLE_GENAI_USE_VERTEXAI=true
GOOGLE_CLOUD_PROJECT=your-project-id
GOOGLE_CLOUD_LOCATION=us-central1
EOF
# 3. Authenticate with Google Cloud:
gcloud auth application-default login
```

**How it works:** The scripts auto-detect which method to use based on your `.env` settings. You can verify your setup by running:

```bash
python scripts/gemini_auth.py
```

---
Expand Down Expand Up @@ -168,7 +198,28 @@ process_files_with_gemini.py → outputs/file_summaries/YYYY-MM-DD/

## Troubleshooting

**`GOOGLE_API_KEY not set`** — check your `.env` file is in the vault root and contains `GOOGLE_API_KEY=your_key`
### Authentication Issues

**`GOOGLE_API_KEY not set`** or **`GOOGLE_CLOUD_PROJECT is not set`**

Check which authentication method you're using:
```bash
python scripts/gemini_auth.py
```

For **API Key method**:
- Verify your `.env` file is in the vault root
- Check it contains `GOOGLE_API_KEY=your_key` (no spaces around `=`)
- Verify the key is valid at https://aistudio.google.com/apikey

For **Vertex AI method**:
- Verify `.env` contains `GOOGLE_GENAI_USE_VERTEXAI=true`
- Check `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` are set
- Ensure you've run: `gcloud auth application-default login`
- Verify Vertex AI API is enabled in your GCP project
- Check your GCP account has permission to use Vertex AI

### Other Issues

**`No module named 'google'`** — run `pip install google-genai`

Expand Down
148 changes: 148 additions & 0 deletions scripts/gemini_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#!/usr/bin/env python3
"""
Centralized Gemini authentication for all scripts.

Supports two authentication methods:
1. API Key (via GOOGLE_API_KEY) — for Google AI Studio
2. Vertex AI (via GOOGLE_GENAI_USE_VERTEXAI) — for Google Cloud Platform

The google-genai library auto-detects which method to use based on environment variables.
This module validates that required variables are set and provides clear error messages.
"""

import os
import sys
from typing import Literal

try:
from google import genai
except ImportError:
print("Error: google-genai library not installed")
print("Install with: pip install google-genai")
sys.exit(1)


def get_auth_method() -> Literal["vertex_ai", "api_key"]:
"""
Determine which authentication method is configured.

Returns:
"vertex_ai" if GOOGLE_GENAI_USE_VERTEXAI is set to true
"api_key" otherwise (default)
"""
use_vertex = os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "").lower()
return "vertex_ai" if use_vertex in ("true", "1", "yes") else "api_key"


def _validate_api_key_auth() -> None:
"""Validate that API key authentication is properly configured."""
api_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")

if not api_key:
print("Error: GOOGLE_API_KEY not set in environment")
print()
print("To use API Key authentication:")
print("1. Get a free key at: https://aistudio.google.com/apikey")
print("2. Add to your .env file:")
print(" GOOGLE_API_KEY=your_key_here")
print()
print("Alternatively, use Vertex AI authentication (see README.md)")
sys.exit(1)


def _validate_vertex_auth() -> None:
"""Validate that Vertex AI authentication is properly configured."""
project = os.environ.get("GOOGLE_CLOUD_PROJECT")
location = os.environ.get("GOOGLE_CLOUD_LOCATION")

errors = []
if not project:
errors.append("GOOGLE_CLOUD_PROJECT is not set")
if not location:
errors.append("GOOGLE_CLOUD_LOCATION is not set")

if errors:
print("Error: Vertex AI authentication is enabled but incomplete")
print()
for error in errors:
print(f" ✗ {error}")
print()
print("To use Vertex AI authentication:")
print("1. Set up a Google Cloud project with Vertex AI enabled")
print("2. Add to your .env file:")
print(" GOOGLE_GENAI_USE_VERTEXAI=true")
print(" GOOGLE_CLOUD_PROJECT=your-project-id")
print(" GOOGLE_CLOUD_LOCATION=us-central1")
print("3. Authenticate with: gcloud auth application-default login")
print()
print("Alternatively, use API Key authentication (see README.md)")
sys.exit(1)


def get_gemini_client() -> genai.Client:
"""
Get a configured Gemini client with automatic authentication.

Auto-detects which authentication method to use based on environment variables:
- If GOOGLE_GENAI_USE_VERTEXAI=true → uses Vertex AI with GCP project/location
- Otherwise → uses API key from GOOGLE_API_KEY or GEMINI_API_KEY

Returns:
genai.Client: Configured client ready to use

Raises:
SystemExit: If authentication is not properly configured
"""
method = get_auth_method()

# Validate that required environment variables are set for chosen method
if method == "vertex_ai":
_validate_vertex_auth()
else:
_validate_api_key_auth()

# Let the library's auto-detection handle the rest
# It will use the appropriate auth method based on environment variables
try:
client = genai.Client()
return client
except Exception as e:
print(f"Error: Failed to create Gemini client")
print(f"Details: {e}")
print()
if method == "vertex_ai":
print("If you see authentication errors, try:")
print(" gcloud auth application-default login")
else:
print("Check that your GOOGLE_API_KEY is valid")
sys.exit(1)


if __name__ == "__main__":
# Quick test of authentication setup
print("Testing Gemini authentication...")
print()

method = get_auth_method()
print(f"Detected method: {method}")
print()

if method == "vertex_ai":
print("Vertex AI configuration:")
print(f" Project: {os.environ.get('GOOGLE_CLOUD_PROJECT', '(not set)')}")
print(f" Location: {os.environ.get('GOOGLE_CLOUD_LOCATION', '(not set)')}")
else:
print("API Key configuration:")
api_key = os.environ.get("GOOGLE_API_KEY", "")
if api_key:
print(f" Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else ''}")
else:
print(" Key: (not set)")

print()
print("Attempting to create client...")

client = get_gemini_client()
print("✓ Success! Client created successfully.")
print()
print(f"You can now use the Gemini scripts with {method} authentication.")
54 changes: 40 additions & 14 deletions scripts/process_docs_to_obsidian.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
clean compressed Markdown notes ready for Obsidian ingestion.

Usage:
python process_docs_to_obsidian.py <input_folder> <output_folder>
python process_docs_to_obsidian.py <input_folder> <output_folder> [--recursive]

Example:
python process_docs_to_obsidian.py ~/Documents/company_files ~/vault/inbox
python process_docs_to_obsidian.py ~/Documents/company_files ~/vault/inbox --recursive

Options:
--recursive, -r Search for files recursively in subdirectories

Supported file types: PDF, PPTX, DOCX, TXT, MD
"""
Expand All @@ -29,13 +33,25 @@
print("Install with: pip install google-genai python-dotenv")
sys.exit(1)

# Import centralized auth module
try:
from gemini_auth import get_gemini_client
except ImportError:
# If running from different directory, try to import from scripts/
sys.path.insert(0, str(Path(__file__).parent))
from gemini_auth import get_gemini_client

# ─────────────────────────────────────────────
# CONFIG — edit these to customise behaviour
# ─────────────────────────────────────────────
#
# MODEL: which Gemini model to use for synthesis
# "gemini-3-flash-preview" — fast, cheap, great for most files ← default
# "gemini-3-pro-preview" — slower, higher quality for dense/complex docs
# Default: reads from MODEL environment variable (.env file)
# Fallback: "gemini-3-flash-preview" if MODEL not set
# Options:
# "gemini-3-flash-preview" — fast, cheap, great for most files
# "gemini-3-pro-preview" — slower, higher quality for dense/complex docs
# "gemini-2.0-flash-exp" — (Vertex AI) experimental 2.0 model
#
# SUPPORTED: file extensions to process — add or remove as needed
#
Expand All @@ -46,8 +62,7 @@
# and it will rewrite it for you.
# ─────────────────────────────────────────────

MODEL = "gemini-3-flash-preview"
API_KEY = os.environ.get("GOOGLE_API_KEY")
MODEL = os.environ.get("MODEL", "gemini-3-flash-preview")
TODAY = date.today().isoformat()

SUPPORTED = {".pdf", ".pptx", ".ppt", ".docx", ".doc", ".txt", ".md"}
Expand Down Expand Up @@ -185,20 +200,22 @@ def process_file(file_path: Path, client: genai.Client) -> str | None:
# BATCH RUNNER
# ─────────────────────────────────────────────

def process_folder(input_folder: str, output_folder: str):
if not API_KEY:
print("Error: GOOGLE_API_KEY not set in .env")
sys.exit(1)

client = genai.Client(api_key=API_KEY)
def process_folder(input_folder: str, output_folder: str, recursive: bool = False):
# Get authenticated client (auto-detects API Key or Vertex AI)
client = get_gemini_client()
input_path = Path(input_folder).expanduser()
output_path = Path(output_folder).expanduser()
output_path.mkdir(parents=True, exist_ok=True)

files = [f for f in input_path.iterdir() if f.suffix.lower() in SUPPORTED]
# Scan for files (recursively or top-level only)
if recursive:
files = [f for f in input_path.rglob('*') if f.is_file() and f.suffix.lower() in SUPPORTED]
else:
files = [f for f in input_path.iterdir() if f.is_file() and f.suffix.lower() in SUPPORTED]

if not files:
print(f"No supported files found in {input_path}")
search_type = "recursively" if recursive else "in"
print(f"No supported files found {search_type} {input_path}")
print(f"Supported types: {', '.join(SUPPORTED)}")
return

Expand Down Expand Up @@ -232,7 +249,16 @@ def process_folder(input_folder: str, output_folder: str):


if __name__ == "__main__":
# Parse command-line arguments
if len(sys.argv) < 3:
print(__doc__)
sys.exit(1)
process_folder(sys.argv[1], sys.argv[2])

input_folder = sys.argv[1]
output_folder = sys.argv[2]
recursive = "--recursive" in sys.argv or "-r" in sys.argv

if recursive:
print("🔄 Recursive mode enabled: scanning subdirectories")

process_folder(input_folder, output_folder, recursive)
Loading