diff --git a/ais_bench/benchmark/models/api_models/base_api.py b/ais_bench/benchmark/models/api_models/base_api.py
index d6681b87..171ffe84 100644
--- a/ais_bench/benchmark/models/api_models/base_api.py
+++ b/ais_bench/benchmark/models/api_models/base_api.py
@@ -3,6 +3,7 @@
 import warnings
 import asyncio
 import os.path as osp
+import ipaddress
 from abc import abstractmethod
 from copy import deepcopy
 from typing import Dict, List, Optional, Tuple, Union
@@ -113,7 +114,17 @@ def _get_base_url(self) -> str:
             if self.url.startswith("http://") or self.url.startswith("https://"):
                 return self.url
             return f"{protocol}://{self.url}"
-        base_url = f"{protocol}://{self.host_ip}:{self.host_port}/"
+
+        # For IPv6 literals, wrap in brackets when constructing the URL.
+        host = self.host_ip
+        try:
+            ip = ipaddress.ip_address(host)
+            if isinstance(ip, ipaddress.IPv6Address):
+                host = f"[{ip}]"
+        except ValueError:
+            # Not an IP address, so it's a hostname. Use it as is.
+            pass
+        base_url = f"{protocol}://{host}:{self.host_port}/"
         return base_url
 
     def _get_service_model_path(self) -> str:
diff --git a/docs/source_en/base_tutorials/all_params/models.md b/docs/source_en/base_tutorials/all_params/models.md
index 1392fb13..f59b7ee8 100644
--- a/docs/source_en/base_tutorials/all_params/models.md
+++ b/docs/source_en/base_tutorials/all_params/models.md
@@ -77,7 +77,7 @@ The description of configurable parameters for the service-oriented inference ba
 | `traffic_cfg` | Dict | Parameters for controlling fluctuations in the request sending rate (for detailed usage instructions, refer to 🔗 [Description of Request Rate (RPS) Distribution Control and Visualization](../../advanced_tutorials/rps_distribution.md)). If this item is not filled in, the function is disabled by default |
 | `retry` | Int | Maximum number of retries after failing to connect to the server. Valid range: [0, 1000] |
 | `api_key` | String | Custom API key, default is an empty string. Only supports the `VLLMCustomAPI` and `VLLMCustomAPIChat` model type. |
-| `host_ip` | String | Server IP address, supporting valid IPv4 or IPv6, e.g., `127.0.0.1` |
+| `host_ip` | String | Server IP address, supporting valid IPv4 or IPv6, e.g., `127.0.0.1`, `::1`. When using an IPv6 literal, the tool automatically wraps it in brackets when building URLs, for example: `http://[::1]:8080/` |
 | `host_port` | Int | Server port number, which must be consistent with the port specified during service-oriented deployment |
 | `url` | String | Custom URL path for accessing the inference service (needs to be configured when the base URL is not a combination of http://host_ip:host_port).For example, when `models`'s `type` is `VLLMCustomAPI`, configure `url` as `https://xxxxxxx/yyyy/`, the actual request URL accessed is `https://xxxxxxx/yyyy/v1/completions` |
 | `max_out_len` | Int | Maximum output length of the inference response; the actual length may be limited by the server. Valid range: (0, 131072] |
@@ -94,6 +94,7 @@ The description of configurable parameters for the service-oriented inference ba
 - When the dataset has timestamps and **use_timestamp** is True in the model config, requests are scheduled by timestamp and **request_rate** and **traffic_cfg** are ignored.
 - Setting `batch_size` too large may result in high CPU usage. Please configure it reasonably based on hardware conditions.
 - The default service address used by the service-oriented inference evaluation API is `localhost:8080`. In actual use, you need to modify it to the IP and port of the service-oriented backend according to the actual deployment.
+- When using an IPv6 literal (such as `::1` or `2001:db8::1`) as `host_ip`, the tool will automatically wrap it in brackets in the generated URL (for example, `http://[2001:db8::1]:8080/`), so you do not need to manually add brackets in the configuration.
 
 
 ## Local Model Backend
diff --git a/docs/source_zh_cn/base_tutorials/all_params/models.md b/docs/source_zh_cn/base_tutorials/all_params/models.md
index 1ee3a41b..7ccc7845 100644
--- a/docs/source_zh_cn/base_tutorials/all_params/models.md
+++ b/docs/source_zh_cn/base_tutorials/all_params/models.md
@@ -72,7 +72,7 @@ models = [
 | `traffic_cfg` | Dict | 请求发送速率波动控制参数（具体使用说明请参考 🔗 [请求速率(RPS)分布控制及可视化说明](../../advanced_tutorials/rps_distribution.md)），不填写此项默认不启用该功能。 |
 | `retry` | Int | 连接服务端失败后的最大重试次数。合法范围：[0, 1000] |
 | `api_key` | String | 自定义API key，默认是空字符串。仅支持 `VLLMCustomAPI` 和 `VLLMCustomAPIChat` 模型类型。 |
-| `host_ip` | String | 服务端 IP 地址，支持合法 IPv4 或 IPv6，例如：`127.0.0.1` |
+| `host_ip` | String | 服务端 IP 地址，支持合法 IPv4 或 IPv6，例如：`127.0.0.1`、`::1`。当使用 IPv6 字面量时，访问 URL 中会自动转换为带方括号的形式，例如：`http://[::1]:8080/` |
 | `host_port` | Int | 服务端端口号，应与服务化部署指定的端口一致 |
 | `url` | String | 自定义访问推理服务的URL路径(当base url不是http/https://host_ip:host_port的组合时需要配置，配置后host_ip和host_port将被忽略) ，例如当`models`的`type`为`VLLMCustomAPI`时，配置`url`为`https://xxxxxxx/yyyy/`，实际请求访问的URL为`https://xxxxxxx/yyyy/v1/completions`|
 | `max_out_len` | Int | 推理响应的最大输出长度，实际长度可能受服务端限制。合法范围：(0, 131072] |
@@ -88,6 +88,7 @@ models = [
 - 当数据集含 timestamp 且模型配置中 **use_timestamp** 为 True 时，请求按 timestamp 发送，**request_rate** 与 **traffic_cfg** 将被忽略。
 - `batch_size` 设置过大可能导致 CPU 占用过高，请根据硬件条件合理配置。
 - 服务化推理评测 API 默认使用的服务地址为 `localhost:8080`。实际使用时需根据实际部署修改为服务化后端的 IP 和端口。
+- 当使用 IPv6 字面量（如 `::1`、`2001:db8::1`）作为 `host_ip` 时，工具会在生成的访问 URL 中自动为其添加方括号（例如 `http://[2001:db8::1]:8080/`），无需在配置中手动编写方括号。
 
 ## 本地模型后端
 |模型配置名称|简介|使用前提|支持的prompt格式(字符串格式或对话格式)|对应源码配置文件路径|
diff --git a/tests/UT/models/api_models/test_base_api.py b/tests/UT/models/api_models/test_base_api.py
index 931f3439..6b04a404 100644
--- a/tests/UT/models/api_models/test_base_api.py
+++ b/tests/UT/models/api_models/test_base_api.py
@@ -61,6 +61,8 @@ async def parse_stream_response(self, data, output):
             "host_ip": "127.0.0.1",
             "host_port": 8000
         }
+        self.ipv6_kwargs = self.default_kwargs.copy()
+        self.ipv6_kwargs["host_ip"] = "::1"
 
     def test_init(self):
         model = self.model_class(**self.default_kwargs)
@@ -70,6 +72,10 @@ def test_init(self):
         self.assertEqual(model.retry, 1)
         self.assertEqual(model.base_url, "http://127.0.0.1:8000/")
 
+    def test_init_with_ipv6_host_ip(self):
+        model = self.model_class(**self.ipv6_kwargs)
+        self.assertEqual(model.base_url, "http://[::1]:8000/")
+
     def test_init_with_url(self):
         kwargs = self.default_kwargs.copy()
         kwargs["url"] = "https://test-api.com/v1"
@@ -86,6 +92,16 @@ def test_get_base_url(self):
         model = self.model_class(**self.default_kwargs)
         self.assertEqual(model._get_base_url(), "http://127.0.0.1:8000/")
 
+    def test_get_base_url_with_ipv6_host_ip(self):
+        model = self.model_class(**self.ipv6_kwargs)
+        self.assertEqual(model._get_base_url(), "http://[::1]:8000/")
+
+    def test_get_base_url_with_hostname(self):
+        kwargs = self.default_kwargs.copy()
+        kwargs["host_ip"] = "localhost"
+        model = self.model_class(**kwargs)
+        self.assertEqual(model._get_base_url(), "http://localhost:8000/")
+
     @mock.patch('requests.get')
     def test_get_service_model_path_success(self, mock_get):
         mock_response = MockResponse(200, json_data={"data": [{"id": "test-model-123"}]})