HKUDS · pmilford · Aug 3, 2025 · Aug 3, 2025 · Aug 3, 2025 · Aug 3, 2025
diff --git a/research_agent/constant.py b/research_agent/constant.py
@@ -22,6 +22,8 @@ def str_to_bool(value):
         return False
     return True  # default return True
 
+# platform of the research agent
+PLATFORM = os.getenv('PLATFORM', "linux/amd64")
 
 DOCKER_WORKPLACE_NAME = os.getenv('DOCKER_WORKPLACE_NAME', 'workplace_meta')
 GITHUB_AI_TOKEN = os.getenv('GITHUB_AI_TOKEN', None)

diff --git a/research_agent/inno/agents/inno_agent/judge_agent.py b/research_agent/inno/agents/inno_agent/judge_agent.py
@@ -1,5 +1,5 @@
 from research_agent.inno.types import Agent
-from research_agent.inno.tools import gen_code_tree_structure, read_file, terminal_page_down, terminal_page_up, terminal_page_to
+from research_agent.inno.tools.terminal_tools import gen_code_tree_structure, read_file, terminal_page_down, terminal_page_up, terminal_page_to
 from research_agent.inno.tools.inno_tools.code_search import search_github_repos
 from research_agent.inno.tools.inno_tools.web_tools import with_env as with_env_web
 from research_agent.inno.util import make_message, make_tool_message

diff --git a/research_agent/inno/agents/inno_agent/ml_agent.py b/research_agent/inno/agents/inno_agent/ml_agent.py
@@ -1,5 +1,5 @@
 from research_agent.inno.types import Agent
-from research_agent.inno.tools import (
+from research_agent.inno.tools.terminal_tools import (
     gen_code_tree_structure, execute_command, read_file, create_file, write_file, list_files, create_directory, run_python, terminal_page_down, terminal_page_up, terminal_page_to
 )
 from research_agent.inno.util import make_message, make_tool_message

diff --git a/research_agent/inno/agents/inno_agent/plan_agent.py b/research_agent/inno/agents/inno_agent/plan_agent.py
@@ -1,6 +1,7 @@
 from research_agent.inno.types import Agent
-from research_agent.inno.tools import gen_code_tree_structure, read_file, plan_dataset, plan_model, plan_training, plan_testing, terminal_page_down, terminal_page_up, terminal_page_to
+from research_agent.inno.tools.terminal_tools import gen_code_tree_structure, read_file, terminal_page_down, terminal_page_up, terminal_page_to
 from research_agent.inno.util import make_message, make_tool_message
+from research_agent.inno.tools.inno_tools.planning_tools import plan_dataset, plan_training, plan_testing
 from research_agent.inno.registry import register_agent
 from research_agent.inno.environment.docker_env import DockerEnv, with_env
 from inspect import signature

diff --git a/research_agent/inno/agents/inno_agent/survey_agent.py b/research_agent/inno/agents/inno_agent/survey_agent.py
@@ -47,7 +47,7 @@ def instructions(context_variables):
    - Formal definitions
    - Mathematical formulas
    - Key theoretical components
-4. Document your findings and transfer your findings to the `Code Survey Agent` using the `transfer_to_code_survey_agent` function. Make sure you have read these papers thoroughly.
+4. Document your findings and transfer your findings to the `Code Survey Agent` using the `transfer_to_code_survey_agent` function. Make sure you have read these papers thoroughly. YOU MUST USE THE `transfer_to_code_survey_agent` function. DO NOT use any other `transfer_...` function.
 
 REQUIREMENTS:
 - Be thorough in your analysis

diff --git a/research_agent/inno/core.py b/research_agent/inno/core.py
@@ -30,6 +30,7 @@
     retry_if_exception_type, 
     RetryCallState
 )
+from tenacity.wait import wait_base
 from openai import AsyncOpenAI
 from research_agent.constant import  API_BASE_URL, NOT_SUPPORT_SENDER, MUST_ADD_USER, NOT_SUPPORT_FN_CALL, NOT_USE_FN_CALL
 from research_agent.inno.fn_call_converter import convert_tools_to_description, convert_non_fncall_messages_to_fncall_messages, SYSTEM_PROMPT_SUFFIX_TEMPLATE, convert_fn_messages_to_non_fn_messages, interleave_user_into_messages
@@ -93,6 +94,27 @@ def truncate_message(message: str) -> str:
     else:
         return message
 
+class my_wait_strategy(wait_base):
+    def __init__(self):
+        self.short_wait = wait_exponential(multiplier=2, min=1, max=60)
+        self.long_wait = wait_exponential(multiplier=2, min=30, max=1200)
+
+    def __call__(self, retry_state):
+        exception = retry_state.outcome.exception()
+        if isinstance(exception, litellm.RateLimitError):
+            return self.long_wait(retry_state)
+        else:
+            return self.short_wait(retry_state)
+
+def before_sleep_fn(retry_state):
+    """A custom function to be called before sleeping in a retry loop."""
+    exception = retry_state.outcome.exception()
+    wait_time = retry_state.next_action.sleep
+    if isinstance(exception, litellm.RateLimitError):
+        print(f"Rate limit exceeded. Retrying in {wait_time:.2f} seconds...")
+    else:
+        print(f"An error occurred: {exception}. Retrying in {wait_time:.2f} seconds... (attempt {retry_state.attempt_number})")
+
 class MetaChain:
     def __init__(self, log_path: Union[str, None, MetaChainLogger] = None):
         """
@@ -337,9 +359,9 @@ def run(
         )
     @retry(
         stop=stop_after_attempt(6),
-        wait=wait_exponential(multiplier=2, min=30, max=1200),
+        wait=my_wait_strategy(),
         retry=should_retry_error,
-        before_sleep=lambda retry_state: print(f"Retrying... (attempt {retry_state.attempt_number})")
+        before_sleep=before_sleep_fn
     )
     async def get_chat_completion_async(
         self,
@@ -381,6 +403,7 @@ async def get_chat_completion_async(
                 "tool_choice": agent.tool_choice,
                 "stream": stream,
                 "base_url": API_BASE_URL,
+                "timeout": 600,
             }
             NO_SENDER_MODE = False
             for not_sender_model in NOT_SUPPORT_SENDER:
@@ -425,6 +448,7 @@ async def get_chat_completion_async(
                 "messages": messages,
                 "stream": stream,
                 "base_url": API_BASE_URL,
+                "timeout": 600,
             }
             completion_response = await acompletion(**create_params)
             last_message = [{"role": "assistant", "content": completion_response.choices[0].message.content}]

diff --git a/research_agent/inno/environment/docker_env.py b/research_agent/inno/environment/docker_env.py
@@ -12,6 +12,7 @@
 from typing import Optional, Union, Dict
 from functools import update_wrapper
 from inspect import signature
+import socket
 @dataclass
 class DockerConfig: 
     container_name: str
@@ -37,6 +38,16 @@ def __init__(self, config: Union[DockerConfig, Dict]):
         self.git_clone = config.git_clone
         self.setup_package = config.setup_package
         self.communication_port = config.communication_port
+
+    @staticmethod
+    def is_port_available(port: int) -> bool:
+        """Check if a port is available for binding."""
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            try:
+                s.bind(("127.0.0.1", port))
+                return True
+            except socket.error:
+                return False
 
     def init_container(self):
         container_check_command = ["docker", "ps", "-a", "--filter", f"name={self.container_name}", "--format", "{{.Names}}"]
@@ -70,35 +81,73 @@ def init_container(self):
                 print(f"Successfully created and switched to new branch: {new_branch_name}")
 
         if existing_container.stdout.strip() == self.container_name:
-            # check if the container is running
-            running_check_command = ["docker", "ps", "--filter", f"name={self.container_name}", "--format", "{{.Names}}"]
-            running_container = subprocess.run(running_check_command, capture_output=True, text=True)
+            port_info = check_container_ports(self.container_name)
 
-            if running_container.stdout.strip() == self.container_name:
-                print(f"Container '{self.container_name}' is already running. Skipping creation.")
-                return  # container is already running, skip creation
-            else:
-                # container exists but is not running, start it
-                start_command = ["docker", "start", self.container_name]
-                subprocess.run(start_command)
-                print(f"Container '{self.container_name}' has been started.")
+            if port_info:
+                # This is a valid, port-mapped container. Check if we can use it.
+                self.communication_port = port_info[0]
+                if not DockerEnv.is_port_available(self.communication_port):
+                    raise Exception(f"Error: Container '{self.container_name}' needs port {self.communication_port}, but it's already in use by another process. Please free the port and try again.")
+
+                running_check_command = ["docker", "ps", "--filter", f"name={self.container_name}", "--format", "{{.Names}}"]
+                running_container = subprocess.run(running_check_command, capture_output=True, text=True)
+
+                if running_container.stdout.strip() != self.container_name:
+                    print(f"Attempting to start existing container '{self.container_name}' on port {self.communication_port}...")
+                    try:
+                        start_command = ["docker", "start", self.container_name]
+                        subprocess.run(start_command, check=True, capture_output=True, text=True)
+                        print(f"Container '{self.container_name}' has been started successfully.")
+                    except subprocess.CalledProcessError as e:
+                        raise Exception(f"Failed to start container '{self.container_name}'. Docker error: {e.stderr}")
+                else:
+                    print(f"Container '{self.container_name}' is already running on port {self.communication_port}.")
                 return
+            else:
+                # This is a zombie container (created but not started, no port). Remove it.
+                print(f"Warning: Found existing container '{self.container_name}' but it has no port mapping. It is likely a zombie from a failed startup. Removing it.")
+                try:
+                    rm_command = ["docker", "rm", "-f", self.container_name]
+                    subprocess.run(rm_command, check=True, capture_output=True, text=True)
+                    print(f"Removed zombie container '{self.container_name}'.")
+                except subprocess.CalledProcessError as e:
+                    raise Exception(f"Failed to remove zombie container '{self.container_name}'. Docker error: {e.stderr}")
 
-        # if the container does not exist, create and start a new container
+        # If we are here, either the container did not exist or we just removed a zombie.
+        # Create a new container and let Docker assign a random available port.
         gpu_cmd = ["--gpus", GPUS] if GPUS else []
         docker_command = [
-            "docker", "run", "-d", "--platform", PLATFORM, "--userns=host",] + gpu_cmd + ["--name", self.container_name, 
+            "docker", "run", "-d", "--platform", PLATFORM, "--userns=host",] + gpu_cmd + ["--name", self.container_name,
             "--user", "root", "-v", f"{self.local_workplace}:{self.docker_workplace}",
-            "-w", f"{self.docker_workplace}", "-p", f"{self.communication_port}:8000", 
+            "-w", f"{self.docker_workplace}", "-p", "8000", # Let Docker assign a random host port
             "--restart", "unless-stopped", BASE_IMAGES
         ]
+        print(f"Creating new container '{self.container_name}'...")
         print(docker_command)
-        # execute the docker command
-        result = subprocess.run(docker_command, capture_output=True, text=True)
-        if result.returncode != 0:
-            raise Exception(f"Failed to start container: {result.stderr}")
-        if self.wait_for_container_ready(timeout=60):
-            print(f"Container '{self.container_name}' has been created and started.")
+
+        try:
+            subprocess.run(docker_command, check=True, capture_output=True, text=True)
+
+            # Discover the port that Docker assigned
+            port_info = check_container_ports(self.container_name)
+            if not port_info:
+                raise Exception("Failed to discover port for newly created container.")
+            self.communication_port = port_info[0]
+            print(f"Container created successfully. Docker assigned port {self.communication_port}.")
+
+            if self.wait_for_container_ready(timeout=60):
+                print(f"Container '{self.container_name}' is ready.")
+        except subprocess.CalledProcessError as e:
+            raise Exception(f"Failed to create container. Docker error: {e.stderr}")
+
+    @staticmethod
+    def is_port_open(host, port, timeout=2):
+        try:
+            with socket.create_connection((host, port), timeout=timeout):
+                return True
+        except Exception:
+            return False
+
     def wait_for_container_ready(self, timeout=30):
         """using subprocess to check if the container is running"""
         start_time = time.time()
@@ -116,13 +165,17 @@ def wait_for_container_ready(self, timeout=30):
                 # 额外检查 tcp_server 是否运行
                 try:
                     port_info = check_container_ports(self.container_name)
-                    assert port_info and (port_info[0] == port_info[1])
-                    available_port = port_info[0]
-                    self.communication_port = available_port
-                    result = self.run_command('ps aux')
-                    print("result", result)
-                    if "tcp_server.py" in result['result']:
-                        return True
+                    if port_info:
+                        host_port, container_port = port_info
+                        self.communication_port = host_port
+
+                        if DockerEnv.is_port_open("localhost", host_port):
+                            result = self.run_command('ps aux')
+                            print("result", result)
+                            if "tcp_server.py" in result['result']:
+                                return True
+                        else:
+                            print(f"Port {host_port} not yet open")
                 except Exception as e:
                     print(f"Failed to check container ports: {e}")
 

diff --git a/research_agent/inno/logger.py b/research_agent/inno/logger.py
@@ -67,10 +67,10 @@ def _print_assistant_message(self, message, timestamp: str):
         self.console.print(self._wrap_title("Assistant Message", "bold light_salmon3"))
         self.console.print(f"{self._wrap_timestamp(timestamp, color=True)}\n[bold blue]{message['sender']}[/bold blue]:", end=" ")
         if message["content"]: self.console.print(escape(message["content"]), highlight=True, emoji=True) 
-        else: self.console.print(None, highlight=True, emoji=True)
+        else: self.console.print()
     def _save_assistant_message(self, message, timestamp: str):
         self._write_log(self._wrap_title("Assistant Message"))
-        content = message["content"] if message["content"] else None
+        content = message["content"] if message["content"] else ""
         self._write_log(f"{self._wrap_timestamp(timestamp, color=False)}\n{message['sender']}: {content}")
     def _print_tool_call(self, tool_calls: List, timestamp: str):
         if len(tool_calls) >= 1: self.console.print(self._wrap_title("Tool Calls", "bold light_pink1"))

diff --git a/research_agent/run_infer_idea.py b/research_agent/run_infer_idea.py
@@ -22,6 +22,7 @@
 import argparse
 import os
 from typing import List, Dict, Any, Union
+import logging
 from research_agent.inno.logger import MetaChainLogger
 import importlib
 from research_agent.inno.environment.utils import setup_dataset
@@ -54,7 +55,8 @@ def find_json_boundaries(text):
         try:
             return json.loads(json_str)
         except json.JSONDecodeError as e:
-            print(f"JSON解析错误: {e}")
+            logging.error(f"JSON解析错误: {e}")
+            logging.error(f"错误的JSON字符串: {json_str}")
             return {}
     return {}
 def get_args(): 
@@ -535,8 +537,8 @@ def main(args, references):
     with open(args.instance_path, "r", encoding="utf-8") as f:
         eval_instance = json.load(f)
     instance_id = eval_instance["instance_id"] + "_idea"
-    local_root = os.path.join(os.getcwd(),"workplace_paper" , f"task_{instance_id}" + "_" + COMPLETION_MODEL.replace("/", "__"),  args.workplace_name)
-    container_name = args.container_name + "_" + instance_id + "_" + COMPLETION_MODEL.replace("/", "__")
+    local_root = os.path.join(os.getcwd(),"workplace_paper" , f"task_{instance_id}" + "_" + COMPLETION_MODEL.replace("/", "__").replace(":", "_"),  args.workplace_name)
+    container_name = args.container_name + "_" + instance_id + "_" + COMPLETION_MODEL.replace("/", "__").replace(":", "_")
     os.makedirs(local_root, exist_ok=True)
     env_config = DockerConfig(container_name = container_name, 
                               workplace_name = args.workplace_name, 
@@ -549,7 +551,7 @@ def main(args, references):
     setup_dataset(args.category, code_env.local_workplace)
     web_env = BrowserEnv(browsergym_eval_env = None, local_root=env_config.local_root, workplace_name=env_config.workplace_name)
     file_env = RequestsMarkdownBrowser(viewport_size=1024 * 4, local_root=env_config.local_root, workplace_name=env_config.workplace_name, downloads_folder=os.path.join(env_config.local_root, env_config.workplace_name, "downloads"))
-    flow = InnoFlow(cache_path="cache_" + instance_id + "_" + COMPLETION_MODEL.replace("/", "__"), log_path="log_" + instance_id, code_env=code_env, web_env=web_env, file_env=file_env, model=args.model)
+    flow = InnoFlow(cache_path="cache_" + instance_id + "_" + COMPLETION_MODEL.replace("/", "__").replace(":", "_"), log_path="log_" + instance_id, code_env=code_env, web_env=web_env, file_env=file_env, model=args.model)
     # ml_result = await flow(instance_path=instance_path)
     asyncio.run(flow(instance_path=args.instance_path, task_level=args.task_level, local_root=local_root, workplace_name=args.workplace_name, max_iter_times=args.max_iter_times, category=args.category, references = references))
     # print(judge_result)