diff --git a/ais_bench/benchmark/models/api_models/base_api.py b/ais_bench/benchmark/models/api_models/base_api.py index d6681b87..171ffe84 100644 --- a/ais_bench/benchmark/models/api_models/base_api.py +++ b/ais_bench/benchmark/models/api_models/base_api.py @@ -3,6 +3,7 @@ import warnings import asyncio import os.path as osp +import ipaddress from abc import abstractmethod from copy import deepcopy from typing import Dict, List, Optional, Tuple, Union @@ -113,7 +114,17 @@ def _get_base_url(self) -> str: if self.url.startswith("http://") or self.url.startswith("https://"): return self.url return f"{protocol}://{self.url}" - base_url = f"{protocol}://{self.host_ip}:{self.host_port}/" + + # For IPv6 literals, wrap in brackets when constructing the URL. + host = self.host_ip + try: + ip = ipaddress.ip_address(host) + if isinstance(ip, ipaddress.IPv6Address): + host = f"[{ip}]" + except ValueError: + # Not an IP address, so it's a hostname. Use it as is. + pass + base_url = f"{protocol}://{host}:{self.host_port}/" return base_url def _get_service_model_path(self) -> str: diff --git a/docs/source_en/base_tutorials/all_params/models.md b/docs/source_en/base_tutorials/all_params/models.md index 1392fb13..f59b7ee8 100644 --- a/docs/source_en/base_tutorials/all_params/models.md +++ b/docs/source_en/base_tutorials/all_params/models.md @@ -77,7 +77,7 @@ The description of configurable parameters for the service-oriented inference ba | `traffic_cfg` | Dict | Parameters for controlling fluctuations in the request sending rate (for detailed usage instructions, refer to 🔗 [Description of Request Rate (RPS) Distribution Control and Visualization](../../advanced_tutorials/rps_distribution.md)). If this item is not filled in, the function is disabled by default | | `retry` | Int | Maximum number of retries after failing to connect to the server. Valid range: [0, 1000] | | `api_key` | String | Custom API key, default is an empty string. Only supports the `VLLMCustomAPI` and `VLLMCustomAPIChat` model type. | -| `host_ip` | String | Server IP address, supporting valid IPv4 or IPv6, e.g., `127.0.0.1` | +| `host_ip` | String | Server IP address, supporting valid IPv4 or IPv6, e.g., `127.0.0.1`, `::1`. When using an IPv6 literal, the tool automatically wraps it in brackets when building URLs, for example: `http://[::1]:8080/` | | `host_port` | Int | Server port number, which must be consistent with the port specified during service-oriented deployment | | `url` | String | Custom URL path for accessing the inference service (needs to be configured when the base URL is not a combination of http://host_ip:host_port).For example, when `models`'s `type` is `VLLMCustomAPI`, configure `url` as `https://xxxxxxx/yyyy/`, the actual request URL accessed is `https://xxxxxxx/yyyy/v1/completions` | | `max_out_len` | Int | Maximum output length of the inference response; the actual length may be limited by the server. Valid range: (0, 131072] | @@ -94,6 +94,7 @@ The description of configurable parameters for the service-oriented inference ba - When the dataset has timestamps and **use_timestamp** is True in the model config, requests are scheduled by timestamp and **request_rate** and **traffic_cfg** are ignored. - Setting `batch_size` too large may result in high CPU usage. Please configure it reasonably based on hardware conditions. - The default service address used by the service-oriented inference evaluation API is `localhost:8080`. In actual use, you need to modify it to the IP and port of the service-oriented backend according to the actual deployment. +- When using an IPv6 literal (such as `::1` or `2001:db8::1`) as `host_ip`, the tool will automatically wrap it in brackets in the generated URL (for example, `http://[2001:db8::1]:8080/`), so you do not need to manually add brackets in the configuration. ## Local Model Backend diff --git a/docs/source_zh_cn/base_tutorials/all_params/models.md b/docs/source_zh_cn/base_tutorials/all_params/models.md index 1ee3a41b..7ccc7845 100644 --- a/docs/source_zh_cn/base_tutorials/all_params/models.md +++ b/docs/source_zh_cn/base_tutorials/all_params/models.md @@ -72,7 +72,7 @@ models = [ | `traffic_cfg` | Dict | 请求发送速率波动控制参数(具体使用说明请参考 🔗 [请求速率(RPS)分布控制及可视化说明](../../advanced_tutorials/rps_distribution.md)),不填写此项默认不启用该功能。 | | `retry` | Int | 连接服务端失败后的最大重试次数。合法范围:[0, 1000] | | `api_key` | String | 自定义API key,默认是空字符串。仅支持 `VLLMCustomAPI` 和 `VLLMCustomAPIChat` 模型类型。 | -| `host_ip` | String | 服务端 IP 地址,支持合法 IPv4 或 IPv6,例如:`127.0.0.1` | +| `host_ip` | String | 服务端 IP 地址,支持合法 IPv4 或 IPv6,例如:`127.0.0.1`、`::1`。当使用 IPv6 字面量时,访问 URL 中会自动转换为带方括号的形式,例如:`http://[::1]:8080/` | | `host_port` | Int | 服务端端口号,应与服务化部署指定的端口一致 | | `url` | String | 自定义访问推理服务的URL路径(当base url不是http/https://host_ip:host_port的组合时需要配置,配置后host_ip和host_port将被忽略) ,例如当`models`的`type`为`VLLMCustomAPI`时,配置`url`为`https://xxxxxxx/yyyy/`,实际请求访问的URL为`https://xxxxxxx/yyyy/v1/completions`| | `max_out_len` | Int | 推理响应的最大输出长度,实际长度可能受服务端限制。合法范围:(0, 131072] | @@ -88,6 +88,7 @@ models = [ - 当数据集含 timestamp 且模型配置中 **use_timestamp** 为 True 时,请求按 timestamp 发送,**request_rate** 与 **traffic_cfg** 将被忽略。 - `batch_size` 设置过大可能导致 CPU 占用过高,请根据硬件条件合理配置。 - 服务化推理评测 API 默认使用的服务地址为 `localhost:8080`。实际使用时需根据实际部署修改为服务化后端的 IP 和端口。 +- 当使用 IPv6 字面量(如 `::1`、`2001:db8::1`)作为 `host_ip` 时,工具会在生成的访问 URL 中自动为其添加方括号(例如 `http://[2001:db8::1]:8080/`),无需在配置中手动编写方括号。 ## 本地模型后端 |模型配置名称|简介|使用前提|支持的prompt格式(字符串格式或对话格式)|对应源码配置文件路径| diff --git a/tests/UT/models/api_models/test_base_api.py b/tests/UT/models/api_models/test_base_api.py index 931f3439..6b04a404 100644 --- a/tests/UT/models/api_models/test_base_api.py +++ b/tests/UT/models/api_models/test_base_api.py @@ -61,6 +61,8 @@ async def parse_stream_response(self, data, output): "host_ip": "127.0.0.1", "host_port": 8000 } + self.ipv6_kwargs = self.default_kwargs.copy() + self.ipv6_kwargs["host_ip"] = "::1" def test_init(self): model = self.model_class(**self.default_kwargs) @@ -70,6 +72,10 @@ def test_init(self): self.assertEqual(model.retry, 1) self.assertEqual(model.base_url, "http://127.0.0.1:8000/") + def test_init_with_ipv6_host_ip(self): + model = self.model_class(**self.ipv6_kwargs) + self.assertEqual(model.base_url, "http://[::1]:8000/") + def test_init_with_url(self): kwargs = self.default_kwargs.copy() kwargs["url"] = "https://test-api.com/v1" @@ -86,6 +92,16 @@ def test_get_base_url(self): model = self.model_class(**self.default_kwargs) self.assertEqual(model._get_base_url(), "http://127.0.0.1:8000/") + def test_get_base_url_with_ipv6_host_ip(self): + model = self.model_class(**self.ipv6_kwargs) + self.assertEqual(model._get_base_url(), "http://[::1]:8000/") + + def test_get_base_url_with_hostname(self): + kwargs = self.default_kwargs.copy() + kwargs["host_ip"] = "localhost" + model = self.model_class(**kwargs) + self.assertEqual(model._get_base_url(), "http://localhost:8000/") + @mock.patch('requests.get') def test_get_service_model_path_success(self, mock_get): mock_response = MockResponse(200, json_data={"data": [{"id": "test-model-123"}]})