-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_mini_run.py
More file actions
111 lines (87 loc) Β· 3.68 KB
/
test_mini_run.py
File metadata and controls
111 lines (87 loc) Β· 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python3
"""
Mini-Run Test Script
===================
Quick test of the mini research evaluation (18 iterations instead of 225).
Perfect for testing your setup before running the full benchmark.
"""
import json
import requests
import time
from pathlib import Path
def test_mini_run_api():
"""Test mini-run via API endpoint."""
print("π§ͺ Testing Mini-Run via API")
print("=" * 40)
# API endpoint
url = "http://localhost:8000/research/evaluate"
# Mini-run request
payload = {
"seeds": [42, 123, 456],
"max_workers": 2,
"output_dir": "test_mini_results",
"mini_run": True # This is the key parameter!
}
print(f"π€ Sending request to {url}")
print(f" Payload: {json.dumps(payload, indent=2)}")
try:
response = requests.post(url, json=payload)
response.raise_for_status()
result = response.json()
print(f"\nβ
Request successful!")
print(f" Evaluation ID: {result['evaluation_id']}")
print(f" Total runs: {result['total_runs']}")
print(f" Estimated duration: {result['estimated_duration_minutes']:.1f} minutes")
return result['evaluation_id']
except requests.exceptions.ConnectionError:
print("β Connection failed. Is the API server running?")
print(" Start it with: PYTHONPATH=src uvicorn swiftsolve.main:app --host 127.0.0.1 --port 8000")
return None
except Exception as e:
print(f"β Request failed: {e}")
return None
def test_mini_run_direct():
"""Test mini-run directly via Python."""
print("\nπ§ͺ Testing Mini-Run via Direct Python Call")
print("=" * 40)
from src.swiftsolve.research.evaluation import ResearchEvaluationRunner
import os
# Check API keys
openai_key = os.getenv('OPENAI_API_KEY')
anthropic_key = os.getenv('ANTHROPIC_API_KEY')
print("π API Key Status:")
print(f" OPENAI_API_KEY: {'β
Set' if openai_key else 'β Missing'}")
print(f" ANTHROPIC_API_KEY: {'β
Set' if anthropic_key else 'β Missing'}")
if not (openai_key and anthropic_key):
print("\nβ οΈ API keys required for evaluation. Set them with:")
print(" export OPENAI_API_KEY='your_key_here'")
print(" export ANTHROPIC_API_KEY='your_key_here'")
return
print("\nπ Initializing mini-run...")
runner = ResearchEvaluationRunner(Path("test_mini_results_direct"), max_workers=1)
print("π Starting MINI evaluation (2 tasks, 18 iterations)...")
print(" This will make actual API calls!")
try:
analysis = runner.run_full_evaluation(seeds=[42], mini_run=True)
print(f"β
Mini evaluation completed!")
print(f" Success rate: {analysis['evaluation_summary']['success_rate_percent']:.1f}%")
print(f" Total runs: {analysis['evaluation_summary']['total_runs']}")
return analysis
except Exception as e:
print(f"β Mini evaluation failed: {e}")
return None
if __name__ == "__main__":
print("π― SwiftSolve Mini-Run Test")
print("="*50)
print("Mini-run: 2 tasks Γ 3 seeds Γ 3 replans = 18 iterations")
print("Full-run: 25 tasks Γ 3 seeds Γ 3 replans = 225 iterations")
print("Speed up: 12.5x faster! π")
print()
# Test API first
evaluation_id = test_mini_run_api()
# Test direct call
test_mini_run_direct()
if evaluation_id:
print(f"\nπ Your mini evaluation is running with ID: {evaluation_id}")
print(f" Check results in: test_mini_results/{evaluation_id}/")
print(f" Monitor logs for progress updates")