christopherwoodall · christopherwoodall · Jul 17, 2025 · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025
diff --git a/.github/README.md b/.github/README.md
@@ -78,6 +78,8 @@ source-agent \
 source-agent --interactive
 ```
 
+![](docs/example3.gif)
+
 ---
 
 ## Supported Providers
@@ -111,7 +113,7 @@ Source Agent provides these built-in tools for code analysis:
 - **directory_delete_tool** - Safely delete directories (recursive option available)
 - **calculate_expression** - Evaluate mathematical expressions (supports sqrt, pi, etc.)
 - **web_search_tool** - Search the web using DuckDuckGo (returns snippets and optional page content)
-- **task_mark_complete** - REQUIRED tool to signal task completion and exit the agent loop
+- **msg_complete_tool** - REQUIRED tool to signal task completion and exit the agent loop
 
 These tools are automatically available to the AI agent during analysis.
 

diff --git a/.github/docs/example3.gif b/.github/docs/example3.gif
diff --git a/AGENTS.md b/AGENTS.md
@@ -29,9 +29,9 @@ Simulate three internal specialists:
 ## Output Format
 - Output executable code first.
 - Include minimal, relevant explanation if necessary.
-- When you have fully satisfied the user's request and provided a complete answer,
-  you MUST call the `msg_task_complete` tool with a summary of what was accomplished and a final message for the user. This signals that the task is finished.
 - Debrief the user before marking the task complete, ensuring they understand the changes made and any implications.
+- When you have completed your thoughts or have nothing further to add, you MUST call
+  the `msg_complete_tool`. This signals that the task is finished.
 
 ## Important Files
 - `pyproject.toml`: Project metadata and dependencies.

diff --git a/Makefile b/Makefile
@@ -61,6 +61,7 @@ bandit: ## Run bandit
 .PHONY: test
 test: ## Run the tests
 -	pytest -s
+-	pytest --cov=src
 
 
 .PHONY: test-tools

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,12 +9,18 @@ heavy-agent = "source_agent.heavy:main"
 
 [project]
 requires-python = ">=3.10"
-version = "0.0.12"
+version = "0.0.13"
 name        = "source-agent"
 description = "Simple coding agent."
 readme      = ".github/README.md"
 license     = "MIT"
 
+classifiers = [
+    "Development Status :: 1 - Planning",
+    "Environment :: Console",
+    "Programming Language :: Python :: 3",
+]
+
 dependencies = [
     "beautifulsoup4",
     "ddgs",
@@ -31,6 +37,7 @@ developer = [
     "hatch",
     "mypy",
     "pytest",
+    "pytest-cov",
     "ruff",
 ]
 
@@ -117,7 +124,8 @@ exclude = []
 [tool.pytest.ini_options]
 pythonpath = ["src"]
 testpaths = ["tests"]
-addopts = "-v"
+addopts = "-ra -q -v"
+python_files = "test_*.py"
 
 
 # General scanning options

diff --git a/src/source_agent/__init__.py b/src/source_agent/__init__.py
@@ -2,9 +2,9 @@
 # Configure clean imports for the package
 # See: https://hynek.me/articles/testing-packaging/
 
-from . import tools, agents
+from . import tools, agents, providers
 from .tools import tool_registry
 from .agents import code
 
 
-__all__ = ["agents", "code", "tools", "tool_registry"]
+__all__ = ["agents", "code", "tools", "tool_registry", "providers"]
diff --git a/src/source_agent/agents/code.py b/src/source_agent/agents/code.py
@@ -1,3 +1,4 @@
+import re
 import json
 import time
 import openai
@@ -7,93 +8,117 @@
 
 
 class CodeAgent:
+    DEFAULT_SYSTEM_PROMPT_PATH = "AGENTS.md"
+    MAX_STEPS = 12
+    MAX_RETRIES = 3
+    BACKOFF_BASE = 1.0
+    BACKOFF_FACTOR = 2.0
+    MAX_BACKOFF = 60.0
+
     def __init__(
         self,
         api_key=None,
         base_url=None,
         model=None,
         temperature=0.3,
+        system_prompt: str = None,
     ):
         self.api_key = api_key
         self.base_url = base_url
         self.model = model
-
         self.temperature = temperature
-        # self.top_p = 0.98
-        # self.frequency_penalty = 0.0005
-        # self.presence_penalty = 0.0005
 
+        self.system_prompt = system_prompt or Path(
+            self.DEFAULT_SYSTEM_PROMPT_PATH
+        ).read_text(encoding="utf-8")
         self.messages = []
-        self.system_prompt = Path("AGENTS.md").read_text(encoding="utf-8")
+        self.reset_conversation()
 
-        self.messages.append({"role": "system", "content": self.system_prompt})
-
-        # Load tools from the registry
         self.tools = source_agent.tools.tool_registry.registry.get_tools()
         self.tool_mapping = source_agent.tools.tool_registry.registry.get_mapping()
 
-        # Initialize session
         self.session = openai.OpenAI(
             base_url=self.base_url,
             api_key=self.api_key,
         )
 
-    def run(self, user_prompt: str = None, max_steps: int = 50):
+    def reset_conversation(self):
+        """Clear conversation and initialize with system prompt."""
+        self.messages = [{"role": "system", "content": self.system_prompt}]
+
+    def run(self, user_prompt: str = None, max_steps: int = None):
         """
-        If user_prompt is provided, seed it;
-        otherwise assume messages already has the last user turn.
-        Then run the full react loop to completion.
+        Run a full ReAct-style loop with tool usage.
+
+        Args:
+            user_prompt: Optional user input to start the conversation.
+            max_steps: Maximum steps before stopping.
         """
-        if user_prompt is not None:
+        if user_prompt:
             self.messages.append({"role": "user", "content": user_prompt})
 
-        for step in range(max_steps):
-            print(f"🔄 Agent iteration {step}/{max_steps}")
+        steps = max_steps or self.MAX_STEPS
+
+        for step in range(1, steps + 1):
+            print(f"🔄 Iteration {step}/{steps}")
             response = self.call_llm(self.messages)
-            choice = response.choices[0]
-            message = choice.message
+
+            message = response.choices[0].message
             self.messages.append(message)
-            print("🤖 Agent:", message.content)
+
+            parsed_content = self.parse_response_message(message.content)
+            if parsed_content:
+                print("🤖 Agent:", parsed_content)
 
             if message.tool_calls:
                 for tool_call in message.tool_calls:
-                    print(f"🔧 Calling: {tool_call.function.name}")
-                    # print(f"📝 Args: {tool_call.function.arguments}")
+                    tool_name = tool_call.function.name
+                    print(f"🔧 Calling: {tool_name}")
+
+                    if tool_name == "msg_complete_tool":
+                        print("💯 Task marked complete!\n")
+                        return
 
                     result = self.handle_tool_call(tool_call)
                     self.messages.append(result)
 
-                    # print("✅ Result:", result)
+            print("-" * 40 + "\n")
 
-                    # # TODO - Better message handling
-                    # if tool_call.function.name == "msg_final_answer":
-                    #     print("✅ Final answer received!")
-                    #     return result
+        return {"error": "Max steps reached without task completion."}
 
-                    if tool_call.function.name == "msg_task_complete":
-                        print("💯 Task marked complete!")
-                        return result
-            # else:
-            #     # print("💭 No tools; continuing")
-            #     pass
+    def parse_response_message(self, message: str) -> str:
+        """
+        Extracts clean user-facing content from a model response.
+        Assumes OpenAI-style JSON snippets with 'type': 'text'.
+        """
+        pattern = r"(\{[^}]*'type'\s*:\s*'text'[^}]*\})"
+        match = re.search(pattern, message, re.DOTALL)
 
-            print("\n" + "-" * 40 + "\n")
+        if match:
+            try:
+                message = match.group(0).replace("'", '"')
+                return json.loads(message).get("text", "").strip()
+            except json.JSONDecodeError:
+                pass
 
-        # print("🚨 Max steps reached without task completion.")
-        return {"error": "Max steps reached without task completion."}
+        return message.strip()
 
     def handle_tool_call(self, tool_call):
+        """Execute the named tool with arguments, return result as message."""
         try:
             tool_name = tool_call.function.name
-            tool_args = json.loads(tool_call.function.arguments)
+            args_raw = tool_call.function.arguments
+
+            try:
+                tool_args = json.loads(args_raw)
+            except json.JSONDecodeError:
+                return self._tool_error(tool_call, "Invalid JSON arguments.")
 
-            if tool_name in self.tool_mapping:
-                func = self.tool_mapping[tool_name]
-                result = func(**tool_args)
-            else:
-                # print(f"❌ Function {tool_name} not found")
-                result = {"error": f"Unknown tool: {tool_name}"}
+            func = self.tool_mapping.get(tool_name)
+            if not func:
+                return self._tool_error(tool_call, f"Unknown tool: {tool_name}")
 
+            result = func(**tool_args)
             return {
                 "role": "tool",
                 "tool_call_id": tool_call.id,
@@ -102,46 +127,60 @@ def handle_tool_call(self, tool_call):
             }
 
         except Exception as e:
-            # print(f"❌ Error executing tool {tool_name}: {str(e)}")
-            return {
-                "role": "tool",
-                "tool_call_id": tool_call.id,
-                "name": tool_name,
-                "content": json.dumps({"error": f"Tool execution failed: {str(e)}"}),
-            }
+            return self._tool_error(tool_call, f"Tool execution failed: {str(e)}")
+
+    def _tool_error(self, tool_call, error_msg: str):
+        """Helper for returning tool execution errors."""
+        return {
+            "role": "tool",
+            "tool_call_id": tool_call.id,
+            "name": tool_call.function.name,
+            "content": json.dumps({"error": error_msg}),
+        }
 
     def call_llm(
         self,
         messages,
-        max_retries: int = 5,
-        backoff_base: float = 1.0,
-        backoff_factor: float = 2.0,
-        max_backoff: float = 60.0,
+        max_retries: int = None,
+        backoff_base: float = None,
+        backoff_factor: float = None,
+        max_backoff: float = None,
     ):
         """
-        Call the OpenAI chat API, retrying on transient errors
-        with exponential backoff and jitter.
+        Call the OpenAI-compatible chat API with retries.
 
         Args:
-            messages: the message list to send
-            max_retries: how many total attempts (including first)
-            backoff_base: initial delay in seconds
-            backoff_factor: multiplier for exponential backoff
-            max_backoff: cap on backoff delay
+            messages: List of messages for the chat API.
+            max_retries: Maximum number of retries on failure.
+            backoff_base: Base delay for exponential backoff.
+            backoff_factor: Factor to increase delay on each retry.
+            max_backoff: Maximum delay before giving up.
+
+        Returns:
+            The response from the chat API.
+
+        Raises:
+            openai.Timeout: If the API call times out.
+            openai.APIError: If the API call fails due to an API error.
+            openai.OpenAIError: If the API call fails after retries.
+            openai.APIStatusError: If the API call fails due to an API status error.
+            openai.RateLimitError: If the API call exceeds the rate limit.
+            openai.APITimeoutError: If the API call times out.
+            openai.APIConnectionError: If the API call fails due to a connection error.
         """
-        # Notes:
-        #  - https://medium.com/@Doug-Creates/nightmares-and-client-chat-completions-create-29ad0acbe16a
-        for attempt in range(1, max_retries + 1):
+        retries = max_retries or self.MAX_RETRIES
+        base = backoff_base or self.BACKOFF_BASE
+        factor = backoff_factor or self.BACKOFF_FACTOR
+        cap = max_backoff or self.MAX_BACKOFF
+
+        for attempt in range(1, retries + 1):
             try:
                 return self.session.chat.completions.create(
                     model=self.model,
                     messages=messages,
                     tools=self.tools,
                     tool_choice="auto",
                     temperature=self.temperature,
-                    # top_p=self.top_p,
-                    # frequency_penalty=self.frequency_penalty,
-                    # presence_penalty=self.presence_penalty,
                 )
             except (
                 openai.Timeout,
@@ -152,21 +191,17 @@ def call_llm(
                 openai.APITimeoutError,
                 openai.APIConnectionError,
             ) as e:
-                # If we've used up our retries, re‐raise
-                if attempt == max_retries:
+                if attempt == retries:
                     print(f"❌ LLM call failed after {attempt} attempts: {e}")
                     raise
-                # Otherwise, back off and retry
-                delay = min(
-                    backoff_base * (backoff_factor ** (attempt - 1)) + random.random(),
-                    max_backoff,
-                )
+
+                delay = min(base * (factor ** (attempt - 1)) + random.random(), cap)
                 print(
-                    f"⚠️ Attempt {attempt} failed with {type(e).__name__}: {e}. "
+                    f"⚠️  Attempt {attempt} failed: {type(e).__name__}: {e}. "
                     f"Retrying in {delay:.1f}s..."
                 )
                 time.sleep(delay)
+
             except Exception as e:
-                # Unexpected exception - do not retry
-                print(f"❌ Unexpected error in LLM call: {e}")
+                print(f"❌ Unexpected error during LLM call: {e}")
                 raise