Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 78 additions & 11 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,26 @@
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True)

from client import create_client
from progress import count_passing_tests, has_features, print_progress_summary, print_session_header
from progress import (
count_passing_tests,
has_features,
print_progress_summary,
print_session_header,
)
from prompts import (
copy_spec_to_project,
get_coding_prompt,
get_initializer_prompt,
get_single_feature_prompt,
get_testing_prompt,
)
from rate_limit_utils import (
calculate_error_backoff,
calculate_rate_limit_backoff,
clamp_retry_delay,
is_rate_limit_error,
parse_retry_after,
)

# Configuration
AUTO_CONTINUE_DELAY_SECONDS = 3
Expand Down Expand Up @@ -106,8 +118,19 @@ async def run_agent_session(
return "continue", response_text

except Exception as e:
print(f"Error during agent session: {e}")
return "error", str(e)
error_str = str(e)
print(f"Error during agent session: {error_str}")

# Detect rate limit errors from exception message
if is_rate_limit_error(error_str):
# Try to extract retry-after time from error
retry_seconds = parse_retry_after(error_str)
if retry_seconds is not None:
return "rate_limit", str(retry_seconds)
else:
return "rate_limit", "unknown"

return "error", error_str


async def run_autonomous_agent(
Expand Down Expand Up @@ -183,6 +206,8 @@ async def run_autonomous_agent(

# Main loop
iteration = 0
rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff
error_retries = 0 # Track consecutive non-rate-limit errors

while True:
iteration += 1
Expand Down Expand Up @@ -250,13 +275,28 @@ async def run_autonomous_agent(

# Handle status
if status == "continue":
# Reset error retries on success; rate-limit retries reset only if no signal
error_retries = 0
reset_rate_limit_retries = True

delay_seconds = AUTO_CONTINUE_DELAY_SECONDS
target_time_str = None

if "limit reached" in response.lower():
print("Claude Agent SDK indicated limit reached.")
# Check for rate limit indicators in response text
if is_rate_limit_error(response):
print("Claude Agent SDK indicated rate limit reached.")
reset_rate_limit_retries = False

# Try to extract retry-after from response text first
retry_seconds = parse_retry_after(response)
if retry_seconds is not None:
delay_seconds = clamp_retry_delay(retry_seconds)
else:
# Use exponential backoff when retry-after unknown
delay_seconds = calculate_rate_limit_backoff(rate_limit_retries)
rate_limit_retries += 1

# Try to parse reset time from response
# Try to parse reset time from response (more specific format)
match = re.search(
r"(?i)\bresets(?:\s+at)?\s+(\d+)(?::(\d+))?\s*(am|pm)\s*\(([^)]+)\)",
response,
Expand Down Expand Up @@ -285,9 +325,7 @@ async def run_autonomous_agent(
target += timedelta(days=1)

delta = target - now
delay_seconds = min(
delta.total_seconds(), 24 * 60 * 60
) # Clamp to 24 hours max
delay_seconds = clamp_retry_delay(int(delta.total_seconds()))
target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z")

except Exception as e:
Expand Down Expand Up @@ -324,12 +362,41 @@ async def run_autonomous_agent(
print(f"\nSingle-feature mode: Feature #{feature_id} session complete.")
break

# Reset rate limit retries only if no rate limit signal was detected
if reset_rate_limit_retries:
rate_limit_retries = 0

await asyncio.sleep(delay_seconds)

elif status == "rate_limit":
# Smart rate limit handling with exponential backoff
# Reset error counter so mixed events don't inflate delays
error_retries = 0
if response != "unknown":
try:
delay_seconds = clamp_retry_delay(int(response))
except (ValueError, TypeError):
# Malformed value - fall through to exponential backoff
response = "unknown"
if response == "unknown":
# Use exponential backoff when retry-after unknown or malformed
delay_seconds = calculate_rate_limit_backoff(rate_limit_retries)
rate_limit_retries += 1
print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...")
else:
print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...")

await asyncio.sleep(delay_seconds)

elif status == "error":
# Non-rate-limit errors: linear backoff capped at 5 minutes
# Reset rate limit counter so mixed events don't inflate delays
rate_limit_retries = 0
error_retries += 1
delay_seconds = calculate_error_backoff(error_retries)
print("\nSession encountered an error")
print("Will retry with a fresh session...")
await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...")
await asyncio.sleep(delay_seconds)

# Small delay between sessions
if max_iterations is None or iteration < max_iterations:
Expand Down
124 changes: 124 additions & 0 deletions rate_limit_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""
Rate Limit Utilities
====================

Shared utilities for detecting and handling API rate limits.
Used by both agent.py (production) and test_rate_limit_utils.py (tests).
"""

import re
from typing import Optional

# Regex patterns for rate limit detection (used in both exception messages and response text)
# These patterns use word boundaries to avoid false positives like "PR #429" or "please wait while I..."
RATE_LIMIT_REGEX_PATTERNS = [
r"\brate[_\s]?limit", # "rate limit", "rate_limit", "ratelimit"
r"\btoo\s+many\s+requests", # "too many requests"
r"\bhttp\s*429\b", # "http 429", "http429"
r"\bstatus\s*429\b", # "status 429", "status429"
r"\berror\s*429\b", # "error 429", "error429"
r"\b429\s+too\s+many", # "429 too many"
r"\boverloaded\b", # "overloaded"
r"\bquota\s*exceeded\b", # "quota exceeded"
]

# Compiled regex for efficient matching
_RATE_LIMIT_REGEX = re.compile(
"|".join(RATE_LIMIT_REGEX_PATTERNS),
re.IGNORECASE
)


def parse_retry_after(error_message: str) -> Optional[int]:
"""
Extract retry-after seconds from various error message formats.

Handles common formats:
- "Retry-After: 60"
- "retry after 60 seconds"
- "try again in 5 seconds"
- "30 seconds remaining"

Args:
error_message: The error message to parse

Returns:
Seconds to wait, or None if not parseable.
"""
# Patterns require explicit "seconds" or "s" unit, OR no unit at all (end of string/sentence)
# This prevents matching "30 minutes" or "1 hour" since those have non-seconds units
patterns = [
r"retry.?after[:\s]+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit
r"retry.?after[:\s]+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence
r"try again in\s+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit
r"try again in\s+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence
r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
]

for pattern in patterns:
match = re.search(pattern, error_message, re.IGNORECASE)
if match:
return int(match.group(1))

return None


def is_rate_limit_error(error_message: str) -> bool:
"""
Detect if an error message indicates a rate limit.

Uses regex patterns with word boundaries to avoid false positives
like "PR #429", "please wait while I...", or "Node v14.29.0".

Args:
error_message: The error message to check

Returns:
True if the message indicates a rate limit, False otherwise.
"""
return bool(_RATE_LIMIT_REGEX.search(error_message))


def calculate_rate_limit_backoff(retries: int) -> int:
"""
Calculate exponential backoff for rate limits.

Formula: min(60 * 2^retries, 3600) - caps at 1 hour
Sequence: 60s, 120s, 240s, 480s, 960s, 1920s, 3600s...

Args:
retries: Number of consecutive rate limit retries (0-indexed)

Returns:
Delay in seconds (clamped to 1-3600 range)
"""
return int(min(max(60 * (2 ** retries), 1), 3600))


def calculate_error_backoff(retries: int) -> int:
"""
Calculate linear backoff for non-rate-limit errors.

Formula: min(30 * retries, 300) - caps at 5 minutes
Sequence: 30s, 60s, 90s, 120s, ... 300s

Args:
retries: Number of consecutive error retries (1-indexed)

Returns:
Delay in seconds (clamped to 1-300 range)
"""
return min(max(30 * retries, 1), 300)


def clamp_retry_delay(delay_seconds: int) -> int:
"""
Clamp a retry delay to a safe range (1-3600 seconds).

Args:
delay_seconds: The raw delay value

Returns:
Delay clamped to 1-3600 seconds
"""
return min(max(delay_seconds, 1), 3600)
Loading
Loading