HelixTechnologies · gusfraser · Aug 8, 2025 · Aug 8, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "replicantx"
-version = "0.1.9"
+version = "0.1.10"
 description = "End-to-end testing harness for AI agents via web service API"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -14,7 +14,7 @@ authors = [
 ]
 keywords = ["ai", "agent", "testing", "e2e", "api"]
 classifiers = [
-    "Development Status :: 3 - Alpha",
+    "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.11",
@@ -23,20 +23,21 @@ classifiers = [
 ]
 
 dependencies = [
-    "pydantic>=2.7",
-    "typer>=0.12.0",
-    "httpx>=0.27.0",
-    "PyYAML>=6.0",
-    "supabase>=2.0.0",
-    "jinja2>=3.1.0",
-    "rich>=13.0.0",
-    "pydantic-ai>=0.3.0",
-    "python-dotenv>=1.0.0",
+    "pydantic>=2.11.7",
+    "typer>=0.16.0",
+    "httpx>=0.28.1",
+    "PyYAML>=6.0.2",
+    "supabase>=2.18.0",
+    "jinja2>=3.1.6",
+    "rich>=14.1.0",
+    "pydantic-ai>=0.6.2",
+    "python-dotenv>=1.1.1",
+    "typing-extensions>=4.14.1",
 ]
 
 [project.optional-dependencies]
 cli = [
-    "typer[all]>=0.12.0",
+    "typer[all]>=0.16.0",
 ]
 dev = [
     "pytest>=8.0.0",
@@ -48,15 +49,15 @@ dev = [
 # Note: PydanticAI is included in core dependencies and handles all LLM providers
 # Optional providers can be installed separately based on PydanticAI documentation
 openai = [
-    "openai>=1.0.0",
+    "openai>=1.99.3",
 ]
 anthropic = [
-    "anthropic>=0.34.0",
+    "anthropic>=0.61.0",
 ]
 all = [
-    "typer[all]>=0.12.0",
-    "openai>=1.0.0",
-    "anthropic>=0.34.0",
+    "typer[all]>=0.16.0",
+    "openai>=1.99.3",
+    "anthropic>=0.61.0",
 ]
 
 [project.scripts]

diff --git a/replicantx/cli.py b/replicantx/cli.py
@@ -73,7 +73,7 @@ def run(
         False, "--ci", help="CI mode: exit with non-zero code if any tests fail"
     ),
     verbose: bool = typer.Option(
-        False, "--verbose", "-v", help="Enable verbose output"
+        False, "--verbose", help="Enable verbose output"
     ),
     debug: bool = typer.Option(
         False, "--debug", help="Enable debug mode: Shows detailed technical information including HTTP client setup, request payloads, response validation, AI processing, and assertion results. Perfect for troubleshooting failed tests and performance analysis."
@@ -266,7 +266,7 @@ async def run_scenarios_sequential(
             if config.level == TestLevel.BASIC:
                 runner = BasicScenarioRunner(config, debug=debug, watch=watch)
             elif config.level == TestLevel.AGENT:
-                runner = AgentScenarioRunner(config, debug=debug, watch=watch)
+                runner = AgentScenarioRunner(config, debug=debug, watch=watch, verbose=verbose)
             else:
                 raise ValueError(f"Unsupported test level: {config.level}")
 
@@ -400,7 +400,7 @@ async def _execute_scenario(
     if config.level == TestLevel.BASIC:
         runner = BasicScenarioRunner(config, debug=debug, watch=watch)
     elif config.level == TestLevel.AGENT:
-        runner = AgentScenarioRunner(config, debug=debug, watch=watch)
+        runner = AgentScenarioRunner(config, debug=debug, watch=watch, verbose=verbose)
     else:
         raise ValueError(f"Unsupported test level: {config.level}")
 

diff --git a/replicantx/scenarios/agent.py b/replicantx/scenarios/agent.py
@@ -27,18 +27,20 @@
 class AgentScenarioRunner:
     """Runner for Replicant agent-driven (Level 2) test scenarios."""
 
-    def __init__(self, config: ScenarioConfig, debug: bool = False, watch: bool = False):
+    def __init__(self, config: ScenarioConfig, debug: bool = False, watch: bool = False, verbose: bool = False):
         """Initialize the agent scenario runner.
 
         Args:
             config: Scenario configuration with Replicant agent setup
             debug: Enable debug mode with technical details
             watch: Enable watch mode for real-time monitoring
+            verbose: Enable verbose output for system prompts
         """
         self.config = config
         self.debug = debug
         self.watch = watch
-        self.console = Console() if (debug or watch) else None
+        self.verbose = verbose
+        self.console = Console() if (debug or watch or verbose) else None
         self.auth_provider = self._create_auth_provider()
         self.http_client: Optional[HTTPClient] = None
         self.replicant_agent: Optional[ReplicantAgent] = None
@@ -147,7 +149,7 @@ async def run(self) -> ScenarioReport:
         })
 
         # Initialize Replicant agent
-        self.replicant_agent = ReplicantAgent.create(self.config.replicant)
+        self.replicant_agent = ReplicantAgent.create(self.config.replicant, verbose=self.verbose)
 
         current_datetime = datetime.now()
         date_str = current_datetime.strftime("%A, %B %d, %Y")

diff --git a/replicantx/scenarios/replicant.py b/replicantx/scenarios/replicant.py
@@ -39,9 +39,10 @@ class GoalEvaluator(BaseModel):
     model_name: Optional[str] = Field(None, description="Model for intelligent evaluation")
     custom_prompt: Optional[str] = Field(None, description="Custom evaluation prompt")
     completion_keywords: List[str] = Field(..., description="Keywords for keyword-based evaluation")
+    verbose: bool = Field(False, description="Enable verbose output for system prompts")
 
     @classmethod
-    def create(cls, config: ReplicantConfig) -> "GoalEvaluator":
+    def create(cls, config: ReplicantConfig, verbose: bool = False) -> "GoalEvaluator":
         """Create a GoalEvaluator from ReplicantConfig.
 
         Args:
@@ -56,7 +57,8 @@ def create(cls, config: ReplicantConfig) -> "GoalEvaluator":
             mode=config.goal_evaluation_mode,
             model_name=model_name,
             custom_prompt=config.goal_evaluation_prompt,
-            completion_keywords=config.completion_keywords
+            completion_keywords=config.completion_keywords,
+            verbose=verbose
         )
 
     async def evaluate_goal_completion(
@@ -146,12 +148,24 @@ async def _evaluate_with_llm(
 
             # Create LLM agent for evaluation
             model = infer_model(self.model_name)
+            # Only include max_tokens for evaluation - don't set temperature to avoid compatibility issues
             agent = PydanticAgent(
                 model=model,
                 instructions="You are an expert at evaluating whether conversation goals have been achieved. Be precise and analytical.",
-                model_settings={"temperature": 0.1, "max_tokens": 200}  # Low temperature for consistency
+                model_settings={"max_tokens": 1000}  # Only include max_tokens, skip temperature for compatibility
             )
 
+            # Verbose logging of the goal evaluation prompt
+            if self.verbose:
+                print("\n" + "="*80)
+                print("🔍 VERBOSE: GOAL EVALUATION PROMPT SENT TO PYDANTICAI")
+                print("="*80)
+                print(f"Model: {self.model_name}")
+                print(f"Model Settings: {{'max_tokens': 200}}")
+                print(f"Instructions: You are an expert at evaluating whether conversation goals have been achieved. Be precise and analytical.")
+                print(f"Prompt: {prompt}")
+                print("="*80 + "\n")
+
             # Get evaluation
             result = await agent.run(prompt)
             response = result.output.strip()
@@ -327,6 +341,7 @@ class ResponseGenerator(BaseModel):
     system_prompt: str = Field(..., description="System prompt for response generation")
     model_settings: Dict[str, Any] = Field(default_factory=dict, description="Model settings")
     facts: Dict[str, Any] = Field(..., description="Available facts")
+    verbose: bool = Field(False, description="Enable verbose output for system prompts")
 
     def _create_agent(self) -> PydanticAgent:
         """Create a PydanticAI agent instance."""
@@ -374,6 +389,18 @@ async def generate_response(self, api_message: str, conversation_state: Conversa
 
             # Create and use PydanticAI agent
             agent = self._create_agent()
+
+            # Verbose logging of the complete system prompt
+            if self.verbose:
+                print("\n" + "="*80)
+                print("🔍 VERBOSE: COMPLETE SYSTEM PROMPT SENT TO PYDANTICAI")
+                print("="*80)
+                print(f"Model: {self.model_name}")
+                print(f"Model Settings: {self.model_settings}")
+                print(f"System Prompt: {self.system_prompt}")
+                print(f"Context: {context}")
+                print("="*80 + "\n")
+
             result = await agent.run(context)
 
             return result.output
@@ -418,16 +445,17 @@ class ReplicantAgent(BaseModel):
     goal_evaluator: GoalEvaluator = Field(..., description="Goal evaluation utility")
 
     @classmethod
-    def create(cls, config: ReplicantConfig) -> "ReplicantAgent":
+    def create(cls, config: ReplicantConfig, verbose: bool = False) -> "ReplicantAgent":
         """Create a new Replicant agent.
 
         Args:
             config: Replicant configuration
+            verbose: Enable verbose output for system prompts
 
         Returns:
             Configured Replicant agent
         """
-        # Build model settings
+        # Build model settings - only include parameters that are explicitly provided
         model_settings = {}
         if config.llm.temperature is not None:
             model_settings["temperature"] = config.llm.temperature
@@ -438,10 +466,11 @@ def create(cls, config: ReplicantConfig) -> "ReplicantAgent":
             model_name=config.llm.model,
             system_prompt=config.system_prompt,
             model_settings=model_settings,
-            facts=config.facts
+            facts=config.facts,
+            verbose=verbose
         )
 
-        goal_evaluator = GoalEvaluator.create(config)
+        goal_evaluator = GoalEvaluator.create(config, verbose=verbose)
 
         return cls(
             config=config,