kbichave
diff --git a/‎.env.example‎
Lines changed: 9 additions & 12 deletions b/‎.env.example‎
Lines changed: 9 additions & 12 deletions
diff --git a/‎config/litellm_router.yaml‎
Lines changed: 5 additions & 5 deletions b/‎config/litellm_router.yaml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎litellm_config.yaml‎
Lines changed: 16 additions & 40 deletions b/‎litellm_config.yaml‎
Lines changed: 16 additions & 40 deletions
diff --git a/‎src/quantstack/alpha_discovery/hypothesis_agent.py‎
Lines changed: 1 addition & 1 deletion b/‎src/quantstack/alpha_discovery/hypothesis_agent.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/quantstack/checkpointing.py‎
Lines changed: 55 additions & 19 deletions b/‎src/quantstack/checkpointing.py‎
Lines changed: 55 additions & 19 deletions
@@ -55,15 +55,13 @@ DATA_PROVIDER_PRIORITY=alpaca,polygon,financial_datasets,alpha_vantage
 #   azure | groq | together_ai | fireworks_ai | mistral |
 #   ollama | custom_openai | bedrock_groq
 #
-# bedrock_groq (recommended): Sonnet for heavy-tier agents (trade_debater,
-#   fund_manager, quant_researcher, etc.), Groq Llama for all operational
-#   agents (monitoring, scanning, execution, health checks). Cuts cost on
-#   14 of 21 agents while keeping reasoning quality where it matters.
-LLM_PROVIDER=bedrock
+# groq: Qwen3-32B for all operational tiers (medium/light/bulk).
+# bedrock_groq: OpenAI gpt-oss-120b for heavy reasoning, Groq Qwen3-32B for all others.
+# No AWS Bedrock used — Sonnet is replaced by gpt-oss-120b for heavy tasks.
+LLM_PROVIDER=groq
 
 # Fallback chain — comma-separated, tried left to right if primary is unavailable.
-# Example: if Bedrock creds expire mid-day, agents will automatically fall back.
-LLM_FALLBACK_CHAIN=anthropic,openai
+LLM_FALLBACK_CHAIN=openai,groq
 
 # =============================================================================
 # Provider credentials
@@ -100,12 +98,11 @@ OPENAI_MODEL=gpt-4o
 # AZURE_API_VERSION=2024-02-15-preview
 # AZURE_DEPLOYMENT_NAME=gpt-4o
 
-# --- Groq (Tier 2 — extremely fast inference, free tier available) ---
+# --- Groq (primary provider) ---
 # Used standalone (LLM_PROVIDER=groq) or in hybrid mode (LLM_PROVIDER=bedrock_groq).
-# Hybrid mode: heavy tier stays on Bedrock Sonnet, medium/light/bulk use Groq Llama.
-# Models: llama-3.3-70b-versatile (medium), llama-3.1-8b-instant (light/bulk).
+# Hybrid mode: heavy tier uses OpenAI gpt-oss-120b, all others use Groq Qwen3-32B.
 GROQ_API_KEY=
-# GROQ_MODEL=llama-3.3-70b-versatile
+GROQ_MODEL=qwen/qwen3-32b
 
 # --- Together AI (Tier 2) ---
 # TOGETHER_API_KEY=
@@ -143,7 +140,7 @@ GROQ_API_KEY=
 
 # ICs — narrow focused work, cheaper/faster models are sufficient
 # LLM_MODEL_IC=bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0
-# LLM_MODEL_IC=groq/llama-3.3-70b-versatile              # budget option (~$0.005/run)
+# LLM_MODEL_IC=groq/qwen/qwen3-32b                        # Groq Qwen3-32B
 # LLM_MODEL_IC=gemini/gemini-2.5-flash                   # balanced option
 
 # Pod Managers — synthesis, benefits from stronger models
 
@@ -1,5 +1,5 @@
 # LiteLLM Router Configuration for QuantStack
-# Routes by task cost/quality: Claude for reasoning, Groq for bulk/cheap, Ollama for offline.
+# Routes by task cost/quality: OpenAI gpt-oss-120b for reasoning, Groq Qwen3-32B for bulk/fast.
 #
 # Loaded by: src/quantstack/llm_router.py via LITELLM_ROUTER_CONFIG env var.
 # Usage:
@@ -9,17 +9,17 @@
 model_list:
   - model_name: "reasoning"        # complex decisions: trade debate, fund-manager, hypothesis gen
     litellm_params:
-      model: claude-sonnet-4-6
-      api_key: os.environ/ANTHROPIC_API_KEY
+      model: openai/gpt-oss-120b
+      api_key: os.environ/OPENAI_API_KEY
 
   - model_name: "fast"             # bulk/cheap tasks: market-intel, screener, data validation
     litellm_params:
-      model: groq/llama-3.3-70b-versatile
+      model: groq/qwen/qwen3-32b
       api_key: os.environ/GROQ_API_KEY
 
   - model_name: "bulk"             # alias for fast — high-volume deterministic tasks
     litellm_params:
-      model: groq/llama-3.3-70b-versatile
+      model: groq/qwen/qwen3-32b
       api_key: os.environ/GROQ_API_KEY
 
   - model_name: "local"            # offline fallback — requires Ollama running locally
 
@@ -1,76 +1,52 @@
 # LiteLLM Proxy Configuration
 # Model groups match tier names used by get_chat_model() — no translation needed.
+# Provider strategy: Groq Qwen3-32B for all operational tiers,
+# OpenAI gpt-oss-120b for heavy reasoning. No AWS Bedrock.
 
 model_list:
-  # --- heavy: Sonnet-class, complex reasoning ---
+  # --- heavy: complex reasoning (fund-manager, trade-debater, quant-researcher) ---
   - model_name: heavy
     litellm_params:
-      model: bedrock/us.anthropic.claude-sonnet-4-6
-      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
-      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
-      aws_region_name: os.environ/AWS_DEFAULT_REGION
+      model: openai/gpt-oss-120b
+      api_key: os.environ/OPENAI_API_KEY
     model_info:
       priority: 1
   - model_name: heavy
     litellm_params:
-      model: anthropic/claude-sonnet-4-6
-      api_key: os.environ/ANTHROPIC_API_KEY
-    model_info:
-      priority: 2
-  - model_name: heavy
-    litellm_params:
-      model: groq/llama-3.3-70b-versatile
+      model: groq/qwen/qwen3-32b
       api_key: os.environ/GROQ_API_KEY
     model_info:
-      priority: 3
+      priority: 2
 
-  # --- medium: Haiku-class, structured extraction ---
+  # --- medium: structured extraction (earnings-analyst, position-monitor, daily-planner) ---
   - model_name: medium
     litellm_params:
-      model: bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0
-      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
-      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
-      aws_region_name: os.environ/AWS_DEFAULT_REGION
+      model: groq/qwen/qwen3-32b
+      api_key: os.environ/GROQ_API_KEY
     model_info:
       priority: 1
   - model_name: medium
     litellm_params:
-      model: anthropic/claude-haiku-4-5
-      api_key: os.environ/ANTHROPIC_API_KEY
+      model: openai/gpt-oss-120b
+      api_key: os.environ/OPENAI_API_KEY
     model_info:
       priority: 2
 
-  # --- light: cheapest Haiku, simple coordination ---
+  # --- light: simple coordination (supervisor, health-monitor) ---
   - model_name: light
     litellm_params:
-      model: bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0
-      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
-      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
-      aws_region_name: os.environ/AWS_DEFAULT_REGION
+      model: groq/qwen/qwen3-32b
+      api_key: os.environ/GROQ_API_KEY
     model_info:
       priority: 1
-  - model_name: light
-    litellm_params:
-      model: anthropic/claude-haiku-4-5
-      api_key: os.environ/ANTHROPIC_API_KEY
-    model_info:
-      priority: 2
 
   # --- bulk: OPRO/TextGrad loops, cost-sensitive ---
   - model_name: bulk
     litellm_params:
-      model: groq/llama-3.3-70b-versatile
+      model: groq/qwen/qwen3-32b
       api_key: os.environ/GROQ_API_KEY
     model_info:
       priority: 1
-  - model_name: bulk
-    litellm_params:
-      model: bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0
-      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
-      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
-      aws_region_name: os.environ/AWS_DEFAULT_REGION
-    model_info:
-      priority: 2
 
   # --- embedding: local ---
   - model_name: embedding
 
@@ -13,7 +13,7 @@
 - Returns [] on timeout, Groq failure, or JSON parse failure — never raises.
 - All returned candidates pass schema validation before being returned.
   Malformed rules are dropped silently, not propagated to CandidateFilter.
-- Uses groq/llama-3.3-70b-versatile via LiteLLM — same model as GroqPM/SentimentCollector.
+- Uses groq/qwen/qwen3-32b via LiteLLM for hypothesis generation.
 - Temperature=0 for deterministic structured output.
 - max_hypotheses=5 caps token cost per call regardless of prompt response.
 
 
@@ -1,8 +1,8 @@
 """Durable checkpoint management for LangGraph StateGraphs.
 
-Provides a PostgresSaver-backed checkpointer factory that enables crash
+Provides an AsyncPostgresSaver-backed checkpointer factory that enables crash
 recovery. Each graph runner gets a shared checkpointer backed by a dedicated
-psycopg3 connection pool (separate from the application pool in db.py).
+psycopg3 async connection pool (separate from the application pool in db.py).
 
 Connection budget:
   Main pool (db.py):      max 20
@@ -18,33 +18,71 @@
 logger = logging.getLogger(__name__)
 
 
-def create_checkpointer():
-    """Create a PostgresSaver backed by a dedicated psycopg3 connection pool.
+def _get_pg_url() -> str:
+    return os.getenv("TRADER_PG_URL", "postgresql://localhost/quantstack")
 
-    Pool is sized for checkpoint operations: min_size=2, max_size=6.
-    This is intentionally smaller than the main application pool (max_size=20)
-    because checkpoint writes are less frequent than application queries.
 
-    setup() is NOT called here. Table creation is a deployment step,
-    not a per-startup step. See setup_checkpoint_tables().
+def _run_checkpoint_setup() -> None:
+    """Create checkpoint tables using a sync autocommit connection.
+
+    Uses the sync PostgresSaver.MIGRATIONS list directly since
+    CREATE INDEX CONCURRENTLY requires autocommit mode.
     """
+    import psycopg
     from langgraph.checkpoint.postgres import PostgresSaver
-    from psycopg_pool import ConnectionPool
+    from psycopg.rows import dict_row
 
-    pg_url = os.getenv(
-        "TRADER_PG_URL",
-        f"postgresql://localhost/quantstack",
-    )
+    pg_url = _get_pg_url()
+    migrations = PostgresSaver.MIGRATIONS
+
+    with psycopg.connect(pg_url, autocommit=True, row_factory=dict_row) as conn:
+        with conn.cursor() as cur:
+            cur.execute(migrations[0])
+            results = cur.execute(
+                "SELECT v FROM checkpoint_migrations ORDER BY v DESC LIMIT 1"
+            )
+            row = results.fetchone()
+            version = -1 if row is None else row["v"]
+            for v, migration in zip(
+                range(version + 1, len(migrations)),
+                migrations[version + 1:],
+                strict=False,
+            ):
+                cur.execute(migration)
+                cur.execute(
+                    "INSERT INTO checkpoint_migrations (v) VALUES (%s)", (v,)
+                )
+    logger.info("PostgresSaver checkpoint tables ready")
+
+
+async def create_checkpointer():
+    """Create an AsyncPostgresSaver backed by a dedicated async connection pool.
+
+    Pool is sized for checkpoint operations: min_size=2, max_size=6.
+    Tables are created synchronously before the async pool is opened.
+    """
+    from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
+    from psycopg_pool import AsyncConnectionPool
+
+    pg_url = _get_pg_url()
+
+    # Ensure tables exist (sync, autocommit — safe for CREATE INDEX CONCURRENTLY)
+    try:
+        _run_checkpoint_setup()
+    except Exception as exc:
+        logger.warning("Checkpoint table setup failed (may already exist): %s", exc)
 
-    pool = ConnectionPool(
+    pool = AsyncConnectionPool(
         conninfo=pg_url,
         min_size=2,
         max_size=6,
         max_lifetime=3600,
         max_idle=600,
+        open=False,
     )
+    await pool.open()
 
-    return PostgresSaver(pool)
+    return AsyncPostgresSaver(pool)
 
 
 def setup_checkpoint_tables() -> None:
@@ -53,9 +91,7 @@ def setup_checkpoint_tables() -> None:
     Run once as a deployment/migration step, not on every startup.
     Safe to call multiple times (idempotent CREATE IF NOT EXISTS).
     """
-    checkpointer = create_checkpointer()
-    checkpointer.setup()
-    logger.info("PostgresSaver checkpoint tables created/verified")
+    _run_checkpoint_setup()
 
 
 def prune_old_checkpoints(retention_hours: int = 48) -> int: