AutoForgeAI · leonvanzyl · Feb 1, 2026 · Jan 26, 2026 · Jan 27, 2026 · Jan 27, 2026
diff --git a/agent.py b/agent.py
@@ -23,14 +23,26 @@
     sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True)
 
 from client import create_client
-from progress import count_passing_tests, has_features, print_progress_summary, print_session_header
+from progress import (
+    count_passing_tests,
+    has_features,
+    print_progress_summary,
+    print_session_header,
+)
 from prompts import (
     copy_spec_to_project,
     get_coding_prompt,
     get_initializer_prompt,
     get_single_feature_prompt,
     get_testing_prompt,
 )
+from rate_limit_utils import (
+    calculate_error_backoff,
+    calculate_rate_limit_backoff,
+    clamp_retry_delay,
+    is_rate_limit_error,
+    parse_retry_after,
+)
 
 # Configuration
 AUTO_CONTINUE_DELAY_SECONDS = 3
@@ -106,8 +118,19 @@ async def run_agent_session(
         return "continue", response_text
 
     except Exception as e:
-        print(f"Error during agent session: {e}")
-        return "error", str(e)
+        error_str = str(e)
+        print(f"Error during agent session: {error_str}")
+
+        # Detect rate limit errors from exception message
+        if is_rate_limit_error(error_str):
+            # Try to extract retry-after time from error
+            retry_seconds = parse_retry_after(error_str)
+            if retry_seconds is not None:
+                return "rate_limit", str(retry_seconds)
+            else:
+                return "rate_limit", "unknown"
+
+        return "error", error_str
 
 
 async def run_autonomous_agent(
@@ -183,6 +206,8 @@ async def run_autonomous_agent(
 
     # Main loop
     iteration = 0
+    rate_limit_retries = 0  # Track consecutive rate limit errors for exponential backoff
+    error_retries = 0  # Track consecutive non-rate-limit errors
 
     while True:
         iteration += 1
@@ -250,13 +275,28 @@ async def run_autonomous_agent(
 
         # Handle status
         if status == "continue":
+            # Reset error retries on success; rate-limit retries reset only if no signal
+            error_retries = 0
+            reset_rate_limit_retries = True
+
             delay_seconds = AUTO_CONTINUE_DELAY_SECONDS
             target_time_str = None
 
-            if "limit reached" in response.lower():
-                print("Claude Agent SDK indicated limit reached.")
+            # Check for rate limit indicators in response text
+            if is_rate_limit_error(response):
+                print("Claude Agent SDK indicated rate limit reached.")
+                reset_rate_limit_retries = False
+
+                # Try to extract retry-after from response text first
+                retry_seconds = parse_retry_after(response)
+                if retry_seconds is not None:
+                    delay_seconds = clamp_retry_delay(retry_seconds)
+                else:
+                    # Use exponential backoff when retry-after unknown
+                    delay_seconds = calculate_rate_limit_backoff(rate_limit_retries)
+                    rate_limit_retries += 1
 
-                # Try to parse reset time from response
+                # Try to parse reset time from response (more specific format)
                 match = re.search(
                     r"(?i)\bresets(?:\s+at)?\s+(\d+)(?::(\d+))?\s*(am|pm)\s*\(([^)]+)\)",
                     response,
@@ -285,9 +325,7 @@ async def run_autonomous_agent(
                             target += timedelta(days=1)
 
                         delta = target - now
-                        delay_seconds = min(
-                            delta.total_seconds(), 24 * 60 * 60
-                        )  # Clamp to 24 hours max
+                        delay_seconds = clamp_retry_delay(int(delta.total_seconds()))
                         target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z")
 
                     except Exception as e:
@@ -324,12 +362,41 @@ async def run_autonomous_agent(
                     print(f"\nSingle-feature mode: Feature #{feature_id} session complete.")
                 break
 
+            # Reset rate limit retries only if no rate limit signal was detected
+            if reset_rate_limit_retries:
+                rate_limit_retries = 0
+
+            await asyncio.sleep(delay_seconds)
+
+        elif status == "rate_limit":
+            # Smart rate limit handling with exponential backoff
+            # Reset error counter so mixed events don't inflate delays
+            error_retries = 0
+            if response != "unknown":
+                try:
+                    delay_seconds = clamp_retry_delay(int(response))
+                except (ValueError, TypeError):
+                    # Malformed value - fall through to exponential backoff
+                    response = "unknown"
+            if response == "unknown":
+                # Use exponential backoff when retry-after unknown or malformed
+                delay_seconds = calculate_rate_limit_backoff(rate_limit_retries)
+                rate_limit_retries += 1
+                print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...")
+            else:
+                print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...")
+
             await asyncio.sleep(delay_seconds)
 
         elif status == "error":
+            # Non-rate-limit errors: linear backoff capped at 5 minutes
+            # Reset rate limit counter so mixed events don't inflate delays
+            rate_limit_retries = 0
+            error_retries += 1
+            delay_seconds = calculate_error_backoff(error_retries)
             print("\nSession encountered an error")
-            print("Will retry with a fresh session...")
-            await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
+            print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...")
+            await asyncio.sleep(delay_seconds)
 
         # Small delay between sessions
         if max_iterations is None or iteration < max_iterations:

diff --git a/rate_limit_utils.py b/rate_limit_utils.py
@@ -0,0 +1,124 @@
+"""
+Rate Limit Utilities
+====================
+
+Shared utilities for detecting and handling API rate limits.
+Used by both agent.py (production) and test_rate_limit_utils.py (tests).
+"""
+
+import re
+from typing import Optional
+
+# Regex patterns for rate limit detection (used in both exception messages and response text)
+# These patterns use word boundaries to avoid false positives like "PR #429" or "please wait while I..."
+RATE_LIMIT_REGEX_PATTERNS = [
+    r"\brate[_\s]?limit",         # "rate limit", "rate_limit", "ratelimit"
+    r"\btoo\s+many\s+requests",   # "too many requests"
+    r"\bhttp\s*429\b",            # "http 429", "http429"
+    r"\bstatus\s*429\b",          # "status 429", "status429"
+    r"\berror\s*429\b",           # "error 429", "error429"
+    r"\b429\s+too\s+many",        # "429 too many"
+    r"\boverloaded\b",            # "overloaded"
+    r"\bquota\s*exceeded\b",      # "quota exceeded"
+]
+
+# Compiled regex for efficient matching
+_RATE_LIMIT_REGEX = re.compile(
+    "|".join(RATE_LIMIT_REGEX_PATTERNS),
+    re.IGNORECASE
+)
+
+
+def parse_retry_after(error_message: str) -> Optional[int]:
+    """
+    Extract retry-after seconds from various error message formats.
+
+    Handles common formats:
+    - "Retry-After: 60"
+    - "retry after 60 seconds"
+    - "try again in 5 seconds"
+    - "30 seconds remaining"
+
+    Args:
+        error_message: The error message to parse
+
+    Returns:
+        Seconds to wait, or None if not parseable.
+    """
+    # Patterns require explicit "seconds" or "s" unit, OR no unit at all (end of string/sentence)
+    # This prevents matching "30 minutes" or "1 hour" since those have non-seconds units
+    patterns = [
+        r"retry.?after[:\s]+(\d+)\s*(?:seconds?|s\b)",  # Requires seconds unit
+        r"retry.?after[:\s]+(\d+)(?:\s*$|\s*[,.])",     # Or end of string/sentence
+        r"try again in\s+(\d+)\s*(?:seconds?|s\b)",     # Requires seconds unit
+        r"try again in\s+(\d+)(?:\s*$|\s*[,.])",        # Or end of string/sentence
+        r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
+    ]
+
+    for pattern in patterns:
+        match = re.search(pattern, error_message, re.IGNORECASE)
+        if match:
+            return int(match.group(1))
+
+    return None
+
+
+def is_rate_limit_error(error_message: str) -> bool:
+    """
+    Detect if an error message indicates a rate limit.
+
+    Uses regex patterns with word boundaries to avoid false positives
+    like "PR #429", "please wait while I...", or "Node v14.29.0".
+
+    Args:
+        error_message: The error message to check
+
+    Returns:
+        True if the message indicates a rate limit, False otherwise.
+    """
+    return bool(_RATE_LIMIT_REGEX.search(error_message))
+
+
+def calculate_rate_limit_backoff(retries: int) -> int:
+    """
+    Calculate exponential backoff for rate limits.
+
+    Formula: min(60 * 2^retries, 3600) - caps at 1 hour
+    Sequence: 60s, 120s, 240s, 480s, 960s, 1920s, 3600s...
+
+    Args:
+        retries: Number of consecutive rate limit retries (0-indexed)
+
+    Returns:
+        Delay in seconds (clamped to 1-3600 range)
+    """
+    return int(min(max(60 * (2 ** retries), 1), 3600))
+
+
+def calculate_error_backoff(retries: int) -> int:
+    """
+    Calculate linear backoff for non-rate-limit errors.
+
+    Formula: min(30 * retries, 300) - caps at 5 minutes
+    Sequence: 30s, 60s, 90s, 120s, ... 300s
+
+    Args:
+        retries: Number of consecutive error retries (1-indexed)
+
+    Returns:
+        Delay in seconds (clamped to 1-300 range)
+    """
+    return min(max(30 * retries, 1), 300)
+
+
+def clamp_retry_delay(delay_seconds: int) -> int:
+    """
+    Clamp a retry delay to a safe range (1-3600 seconds).
+
+    Args:
+        delay_seconds: The raw delay value
+
+    Returns:
+        Delay clamped to 1-3600 seconds
+    """
+    return min(max(delay_seconds, 1), 3600)