diff --git a/app/api/v1/image.py b/app/api/v1/image.py
index 3c5a6c3..c53b268 100644
--- a/app/api/v1/image.py
+++ b/app/api/v1/image.py
@@ -36,6 +36,7 @@
from app.services.grok.processor import ImageCollectProcessor, ImageStreamProcessor
from app.services.quota import enforce_daily_quota
from app.services.request_stats import request_stats
+from app.services.token_usage import build_image_usage
from app.services.token import get_token_manager
@@ -510,6 +511,23 @@ async def _record_request(model_id: str, success: bool):
pass
+async def _record_request_with_usage(model_id: str, success: bool, prompt: str, success_count: int = 1):
+ try:
+ usage = build_image_usage(prompt, success_count=success_count)
+ raw = usage.get("_raw") or {}
+ await request_stats.record_request(
+ model_id,
+ success=success,
+ total_tokens=int(usage.get("total_tokens", 0) or 0),
+ input_tokens=int(usage.get("input_tokens", 0) or 0),
+ output_tokens=int(usage.get("output_tokens", 0) or 0),
+ reasoning_tokens=int(raw.get("reasoning_tokens", 0) or 0),
+ cached_tokens=int(raw.get("cached_tokens", 0) or 0),
+ )
+ except Exception:
+ pass
+
+
async def _get_token_for_model(model_id: str):
"""获取指定模型可用 token,失败时抛出统一异常"""
try:
@@ -659,7 +677,12 @@ async def _wrapped_experimental_stream():
consume_on_fail=True,
is_usage=True,
)
- await _record_request(model_info.model_id, True)
+ await _record_request_with_usage(
+ model_info.model_id,
+ True,
+ f"Image Generation: {request.prompt}",
+ success_count=n,
+ )
else:
await _record_request(model_info.model_id, False)
except Exception:
@@ -707,7 +730,12 @@ async def _wrapped_stream():
consume_on_fail=True,
is_usage=True,
)
- await _record_request(model_info.model_id, True)
+ await _record_request_with_usage(
+ model_info.model_id,
+ True,
+ f"Image Generation: {request.prompt}",
+ success_count=n,
+ )
else:
await _record_request(model_info.model_id, False)
except Exception:
@@ -766,7 +794,15 @@ async def _wrapped_stream():
consume_on_fail=True,
is_usage=True,
)
- await _record_request(model_info.model_id, bool(success))
+ if success:
+ await _record_request_with_usage(
+ model_info.model_id,
+ True,
+ f"Image Generation: {request.prompt}",
+ success_count=n,
+ )
+ else:
+ await _record_request(model_info.model_id, False)
except Exception:
pass
@@ -919,7 +955,12 @@ async def _wrapped_experimental_stream():
consume_on_fail=True,
is_usage=True,
)
- await _record_request(model_info.model_id, True)
+ await _record_request_with_usage(
+ model_info.model_id,
+ True,
+ f"Image Edit: {edit_request.prompt}",
+ success_count=n,
+ )
else:
await _record_request(model_info.model_id, False)
except Exception:
@@ -970,7 +1011,12 @@ async def _wrapped_stream():
consume_on_fail=True,
is_usage=True,
)
- await _record_request(model_info.model_id, True)
+ await _record_request_with_usage(
+ model_info.model_id,
+ True,
+ f"Image Edit: {edit_request.prompt}",
+ success_count=n,
+ )
else:
await _record_request(model_info.model_id, False)
except Exception:
@@ -1055,7 +1101,15 @@ async def _wrapped_stream():
consume_on_fail=True,
is_usage=True,
)
- await _record_request(model_info.model_id, bool(success))
+ if success:
+ await _record_request_with_usage(
+ model_info.model_id,
+ True,
+ f"Image Edit: {edit_request.prompt}",
+ success_count=n,
+ )
+ else:
+ await _record_request(model_info.model_id, False)
except Exception:
pass
diff --git a/app/services/grok/chat.py b/app/services/grok/chat.py
index 747d9a6..e476ceb 100644
--- a/app/services/grok/chat.py
+++ b/app/services/grok/chat.py
@@ -532,7 +532,9 @@ async def completions(
# 处理响应
if is_stream:
- processor = StreamProcessor(model_name, token, think).process(response)
+ stream_processor = StreamProcessor(model_name, token, think)
+ processor = stream_processor.process(response)
+ prompt_messages = [msg.model_dump() for msg in messages]
async def _wrapped_stream():
completed = False
@@ -544,8 +546,18 @@ async def _wrapped_stream():
# Only count as "success" when the stream ends naturally.
try:
if completed:
+ usage = stream_processor.build_usage(prompt_messages)
+ raw = usage.get("_raw") or {}
await token_mgr.sync_usage(token, model_name, consume_on_fail=True, is_usage=True)
- await request_stats.record_request(model_name, success=True)
+ await request_stats.record_request(
+ model_name,
+ success=True,
+ total_tokens=int(usage.get("total_tokens", 0) or 0),
+ input_tokens=int(usage.get("prompt_tokens", 0) or 0),
+ output_tokens=int(usage.get("completion_tokens", 0) or 0),
+ reasoning_tokens=int(raw.get("reasoning_tokens", 0) or 0),
+ cached_tokens=int(raw.get("cached_tokens", 0) or 0),
+ )
else:
await request_stats.record_request(model_name, success=False)
except Exception:
@@ -553,10 +565,23 @@ async def _wrapped_stream():
return _wrapped_stream()
- result = await CollectProcessor(model_name, token).process(response)
+ result = await CollectProcessor(model_name, token).process(
+ response,
+ prompt_messages=[msg.model_dump() for msg in messages],
+ )
try:
+ usage = result.get("usage") or {}
+ raw = usage.get("_raw") or {}
await token_mgr.sync_usage(token, model_name, consume_on_fail=True, is_usage=True)
- await request_stats.record_request(model_name, success=True)
+ await request_stats.record_request(
+ model_name,
+ success=True,
+ total_tokens=int(usage.get("total_tokens", 0) or 0),
+ input_tokens=int(usage.get("prompt_tokens", 0) or 0),
+ output_tokens=int(usage.get("completion_tokens", 0) or 0),
+ reasoning_tokens=int(raw.get("reasoning_tokens", 0) or 0),
+ cached_tokens=int(raw.get("cached_tokens", 0) or 0),
+ )
except Exception:
pass
return result
diff --git a/app/services/grok/processor.py b/app/services/grok/processor.py
index b50cb0a..3be986a 100644
--- a/app/services/grok/processor.py
+++ b/app/services/grok/processor.py
@@ -11,6 +11,7 @@
from app.core.config import get_config
from app.core.logger import logger
from app.services.grok.assets import DownloadService
+from app.services.token_usage import build_chat_usage
ASSET_URL = "https://assets.grok.com/"
@@ -116,6 +117,8 @@ def __init__(self, model: str, token: str = "", think: bool = None):
self.fingerprint: str = ""
self.think_opened: bool = False
self.role_sent: bool = False
+ self._output_text: str = ""
+ self._reasoning_text: str = ""
self.filter_tags = get_config("grok.filter_tags", [])
self.image_format = get_config("app.image_format", "url")
@@ -157,6 +160,7 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N
idx = img.get('imageIndex', 0) + 1
progress = img.get('progress', 0)
yield self._sse(f"正在生成第{idx}张图片中,当前进度{progress}%\n")
+ self._reasoning_text += f"正在生成第{idx}张图片中,当前进度{progress}%\n"
continue
# modelResponse
@@ -164,6 +168,7 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N
if self.think_opened and self.show_think:
if msg := mr.get("message"):
yield self._sse(msg + "\n")
+ self._reasoning_text += msg + "\n"
yield self._sse("\n")
self.think_opened = False
@@ -171,18 +176,21 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N
for url in mr.get("generatedImageUrls", []):
parts = url.split("/")
img_id = parts[-2] if len(parts) >= 2 else "image"
-
+
if self.image_format == "base64":
dl_service = self._get_dl()
base64_data = await dl_service.to_base64(url, self.token, "image")
if base64_data:
yield self._sse(f"\n")
+ self._output_text += f"\n"
else:
final_url = await self.process_url(url, "image")
yield self._sse(f"\n")
+ self._output_text += f"\n"
else:
final_url = await self.process_url(url, "image")
yield self._sse(f"\n")
+ self._output_text += f"\n"
if (meta := mr.get("metadata", {})).get("llm_info", {}).get("modelHash"):
self.fingerprint = meta["llm_info"]["modelHash"]
@@ -192,9 +200,14 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N
if (token := resp.get("token")) is not None:
if token and not (self.filter_tags and any(t in token for t in self.filter_tags)):
yield self._sse(token)
+ if self.think_opened and self.show_think:
+ self._reasoning_text += token
+ else:
+ self._output_text += token
if self.think_opened:
yield self._sse("\n")
+ self.think_opened = False
yield self._sse(finish="stop")
yield "data: [DONE]\n\n"
except Exception as e:
@@ -203,6 +216,10 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N
finally:
await self.close()
+ def build_usage(self, prompt_messages: Optional[list[dict]] = None) -> dict[str, Any]:
+ usage = build_chat_usage(prompt_messages or [], (self._output_text + self._reasoning_text))
+ return usage
+
class CollectProcessor(BaseProcessor):
"""非流式响应处理器"""
@@ -211,7 +228,7 @@ def __init__(self, model: str, token: str = ""):
super().__init__(model, token)
self.image_format = get_config("app.image_format", "url")
- async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]:
+ async def process(self, response: AsyncIterable[bytes], prompt_messages: Optional[list[dict]] = None) -> dict[str, Any]:
"""处理并收集完整响应"""
response_id = ""
fingerprint = ""
@@ -261,6 +278,7 @@ async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]:
finally:
await self.close()
+ usage = build_chat_usage(prompt_messages or [], content)
return {
"id": response_id,
"object": "chat.completion",
@@ -272,11 +290,7 @@ async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]:
"message": {"role": "assistant", "content": content, "refusal": None, "annotations": []},
"finish_reason": "stop"
}],
- "usage": {
- "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
- "prompt_tokens_details": {"cached_tokens": 0, "text_tokens": 0, "audio_tokens": 0, "image_tokens": 0},
- "completion_tokens_details": {"text_tokens": 0, "audio_tokens": 0, "reasoning_tokens": 0}
- }
+ "usage": usage
}
diff --git a/app/services/request_logger.py b/app/services/request_logger.py
index 5272e42..839a1ad 100644
--- a/app/services/request_logger.py
+++ b/app/services/request_logger.py
@@ -21,6 +21,11 @@ class RequestLog:
status: int
key_name: str
token_suffix: str
+ total_tokens: int = 0
+ input_tokens: int = 0
+ output_tokens: int = 0
+ reasoning_tokens: int = 0
+ cached_tokens: int = 0
error: str = ""
class RequestLogger:
@@ -95,6 +100,11 @@ async def add_log(self,
status: int,
key_name: str,
token_suffix: str = "",
+ total_tokens: int = 0,
+ input_tokens: int = 0,
+ output_tokens: int = 0,
+ reasoning_tokens: int = 0,
+ cached_tokens: int = 0,
error: str = ""):
"""添加日志"""
if not self._loaded:
@@ -115,6 +125,11 @@ async def add_log(self,
"status": status,
"key_name": key_name,
"token_suffix": token_suffix,
+ "total_tokens": int(total_tokens or 0),
+ "input_tokens": int(input_tokens or 0),
+ "output_tokens": int(output_tokens or 0),
+ "reasoning_tokens": int(reasoning_tokens or 0),
+ "cached_tokens": int(cached_tokens or 0),
"error": error
}
diff --git a/app/services/request_stats.py b/app/services/request_stats.py
index 4718e48..f02429d 100644
--- a/app/services/request_stats.py
+++ b/app/services/request_stats.py
@@ -28,8 +28,26 @@ def __init__(self):
self.file_path = Path(__file__).parents[2] / "data" / "stats.json"
# 统计数据
- self._hourly: Dict[str, Dict[str, int]] = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0})
- self._daily: Dict[str, Dict[str, int]] = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0})
+ self._hourly: Dict[str, Dict[str, int]] = defaultdict(lambda: {
+ "total": 0,
+ "success": 0,
+ "failed": 0,
+ "total_tokens": 0,
+ "input_tokens": 0,
+ "output_tokens": 0,
+ "reasoning_tokens": 0,
+ "cached_tokens": 0,
+ })
+ self._daily: Dict[str, Dict[str, int]] = defaultdict(lambda: {
+ "total": 0,
+ "success": 0,
+ "failed": 0,
+ "total_tokens": 0,
+ "input_tokens": 0,
+ "output_tokens": 0,
+ "reasoning_tokens": 0,
+ "cached_tokens": 0,
+ })
self._models: Dict[str, int] = defaultdict(int)
# 保留策略
@@ -61,10 +79,28 @@ async def _load_data(self):
data = orjson.loads(content)
# 恢复 defaultdict 结构
- self._hourly = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0})
+ self._hourly = defaultdict(lambda: {
+ "total": 0,
+ "success": 0,
+ "failed": 0,
+ "total_tokens": 0,
+ "input_tokens": 0,
+ "output_tokens": 0,
+ "reasoning_tokens": 0,
+ "cached_tokens": 0,
+ })
self._hourly.update(data.get("hourly", {}))
- self._daily = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0})
+ self._daily = defaultdict(lambda: {
+ "total": 0,
+ "success": 0,
+ "failed": 0,
+ "total_tokens": 0,
+ "input_tokens": 0,
+ "output_tokens": 0,
+ "reasoning_tokens": 0,
+ "cached_tokens": 0,
+ })
self._daily.update(data.get("daily", {}))
self._models = defaultdict(int)
@@ -96,7 +132,16 @@ async def _save_data(self):
except Exception as e:
logger.error(f"[Stats] 保存数据失败: {e}")
- async def record_request(self, model: str, success: bool) -> None:
+ async def record_request(
+ self,
+ model: str,
+ success: bool,
+ total_tokens: int = 0,
+ input_tokens: int = 0,
+ output_tokens: int = 0,
+ reasoning_tokens: int = 0,
+ cached_tokens: int = 0,
+ ) -> None:
"""记录一次请求"""
if not self._loaded:
await self.init()
@@ -111,6 +156,11 @@ async def record_request(self, model: str, success: bool) -> None:
self._hourly[hour_key]["success"] += 1
else:
self._hourly[hour_key]["failed"] += 1
+ self._hourly[hour_key]["total_tokens"] += int(total_tokens or 0)
+ self._hourly[hour_key]["input_tokens"] += int(input_tokens or 0)
+ self._hourly[hour_key]["output_tokens"] += int(output_tokens or 0)
+ self._hourly[hour_key]["reasoning_tokens"] += int(reasoning_tokens or 0)
+ self._hourly[hour_key]["cached_tokens"] += int(cached_tokens or 0)
# 天统计
self._daily[day_key]["total"] += 1
@@ -118,6 +168,11 @@ async def record_request(self, model: str, success: bool) -> None:
self._daily[day_key]["success"] += 1
else:
self._daily[day_key]["failed"] += 1
+ self._daily[day_key]["total_tokens"] += int(total_tokens or 0)
+ self._daily[day_key]["input_tokens"] += int(input_tokens or 0)
+ self._daily[day_key]["output_tokens"] += int(output_tokens or 0)
+ self._daily[day_key]["reasoning_tokens"] += int(reasoning_tokens or 0)
+ self._daily[day_key]["cached_tokens"] += int(cached_tokens or 0)
# 模型统计
self._models[model] += 1
@@ -154,7 +209,16 @@ def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]:
from datetime import timedelta
dt = now - timedelta(hours=i)
key = dt.strftime("%Y-%m-%dT%H")
- data = self._hourly.get(key, {"total": 0, "success": 0, "failed": 0})
+ data = self._hourly.get(key, {
+ "total": 0,
+ "success": 0,
+ "failed": 0,
+ "total_tokens": 0,
+ "input_tokens": 0,
+ "output_tokens": 0,
+ "reasoning_tokens": 0,
+ "cached_tokens": 0,
+ })
hourly_data.append({
"hour": dt.strftime("%H:00"),
"date": dt.strftime("%m-%d"),
@@ -167,7 +231,16 @@ def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]:
from datetime import timedelta
dt = now - timedelta(days=i)
key = dt.strftime("%Y-%m-%d")
- data = self._daily.get(key, {"total": 0, "success": 0, "failed": 0})
+ data = self._daily.get(key, {
+ "total": 0,
+ "success": 0,
+ "failed": 0,
+ "total_tokens": 0,
+ "input_tokens": 0,
+ "output_tokens": 0,
+ "reasoning_tokens": 0,
+ "cached_tokens": 0,
+ })
daily_data.append({
"date": dt.strftime("%m-%d"),
**data
@@ -180,6 +253,11 @@ def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]:
total_requests = sum(d["total"] for d in self._hourly.values())
total_success = sum(d["success"] for d in self._hourly.values())
total_failed = sum(d["failed"] for d in self._hourly.values())
+ total_tokens = sum(int(d.get("total_tokens", 0) or 0) for d in self._hourly.values())
+ input_tokens = sum(int(d.get("input_tokens", 0) or 0) for d in self._hourly.values())
+ output_tokens = sum(int(d.get("output_tokens", 0) or 0) for d in self._hourly.values())
+ reasoning_tokens = sum(int(d.get("reasoning_tokens", 0) or 0) for d in self._hourly.values())
+ cached_tokens = sum(int(d.get("cached_tokens", 0) or 0) for d in self._hourly.values())
return {
"hourly": hourly_data,
@@ -189,7 +267,12 @@ def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]:
"total": total_requests,
"success": total_success,
"failed": total_failed,
- "success_rate": round(total_success / total_requests * 100, 1) if total_requests > 0 else 0
+ "success_rate": round(total_success / total_requests * 100, 1) if total_requests > 0 else 0,
+ "total_tokens": total_tokens,
+ "input_tokens": input_tokens,
+ "output_tokens": output_tokens,
+ "reasoning_tokens": reasoning_tokens,
+ "cached_tokens": cached_tokens,
}
}
diff --git a/app/services/token_usage.py b/app/services/token_usage.py
new file mode 100644
index 0000000..fc91651
--- /dev/null
+++ b/app/services/token_usage.py
@@ -0,0 +1,110 @@
+"""Token usage estimation utilities (no external tokenizer)."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Tuple
+
+
+def _count_tokens(text: str) -> int:
+ if not text:
+ return 0
+ ascii_count = 0
+ non_ascii = 0
+ for ch in text:
+ if ord(ch) <= 0x7F:
+ ascii_count += 1
+ else:
+ non_ascii += 1
+ return (ascii_count + 3) // 4 + non_ascii
+
+
+def _split_think(text: str) -> Tuple[str, str]:
+ if not text:
+ return "", ""
+ reasoning_parts: List[str] = []
+ output = text
+ start = 0
+ while True:
+ s = output.find("", start)
+ if s < 0:
+ break
+ e = output.find("", s + 7)
+ if e < 0:
+ break
+ reasoning_parts.append(output[s + 7 : e])
+ output = output[:s] + output[e + 8 :]
+ start = s
+ return "\n".join(reasoning_parts), output
+
+
+def estimate_prompt_tokens(messages: List[Dict[str, Any]]) -> Dict[str, int]:
+ text_parts: List[str] = []
+ image_tokens = 0
+ for msg in messages or []:
+ content = msg.get("content")
+ if isinstance(content, list):
+ for item in content:
+ if isinstance(item, dict) and item.get("type") == "text":
+ t = str(item.get("text") or "")
+ if t.strip():
+ text_parts.append(t)
+ else:
+ t = str(content or "")
+ if t.strip():
+ text_parts.append(t)
+ text_tokens = _count_tokens("\n".join(text_parts))
+ return {
+ "text_tokens": text_tokens,
+ "image_tokens": image_tokens,
+ "prompt_tokens": text_tokens + image_tokens,
+ }
+
+
+def build_chat_usage(messages: List[Dict[str, Any]], completion_text: str) -> Dict[str, Any]:
+ prompt = estimate_prompt_tokens(messages)
+ reasoning_text, output_text = _split_think(completion_text or "")
+ completion_text_tokens = _count_tokens(output_text)
+ reasoning_tokens = _count_tokens(reasoning_text)
+ output_tokens = completion_text_tokens + reasoning_tokens
+ input_tokens = prompt["prompt_tokens"]
+ total_tokens = input_tokens + output_tokens
+ return {
+ "prompt_tokens": input_tokens,
+ "completion_tokens": output_tokens,
+ "total_tokens": total_tokens,
+ "prompt_tokens_details": {
+ "cached_tokens": 0,
+ "text_tokens": prompt["text_tokens"],
+ "audio_tokens": 0,
+ "image_tokens": prompt["image_tokens"],
+ },
+ "completion_tokens_details": {
+ "text_tokens": completion_text_tokens,
+ "audio_tokens": 0,
+ "reasoning_tokens": reasoning_tokens,
+ },
+ "_raw": {
+ "reasoning_tokens": reasoning_tokens,
+ "cached_tokens": 0,
+ "input_tokens": input_tokens,
+ "output_tokens": output_tokens,
+ },
+ }
+
+
+def build_image_usage(prompt: str, success_count: int = 1) -> Dict[str, Any]:
+ text_tokens = _count_tokens(str(prompt or ""))
+ success = max(1, int(success_count or 1))
+ total_tokens = text_tokens * success
+ return {
+ "total_tokens": total_tokens,
+ "input_tokens": total_tokens,
+ "output_tokens": 0,
+ "input_tokens_details": {"text_tokens": text_tokens, "image_tokens": 0},
+ "_raw": {
+ "reasoning_tokens": 0,
+ "cached_tokens": 0,
+ "input_tokens": total_tokens,
+ "output_tokens": 0,
+ },
+ }
diff --git a/app/static/datacenter/datacenter.html b/app/static/datacenter/datacenter.html
index 6b1fc8d..c0c03d1 100644
--- a/app/static/datacenter/datacenter.html
+++ b/app/static/datacenter/datacenter.html
@@ -88,6 +88,29 @@
数据中心
+
+
diff --git a/app/static/datacenter/datacenter.js b/app/static/datacenter/datacenter.js
index 2d3087c..383cfc6 100644
--- a/app/static/datacenter/datacenter.js
+++ b/app/static/datacenter/datacenter.js
@@ -108,6 +108,11 @@ function updateMetricsUI(data) {
setText('m-req-success', safeNum(sum.success).toLocaleString());
setText('m-req-failed', safeNum(sum.failed).toLocaleString());
setText('m-success-rate', formatPercent(safeNum(sum.success_rate)));
+ setText('m-total-tokens', safeNum(sum.total_tokens).toLocaleString());
+ setText('m-input-tokens', safeNum(sum.input_tokens).toLocaleString());
+ setText('m-output-tokens', safeNum(sum.output_tokens).toLocaleString());
+ setText('m-reasoning-tokens', safeNum(sum.reasoning_tokens).toLocaleString());
+ setText('m-cached-tokens', safeNum(sum.cached_tokens).toLocaleString());
const cache = data.cache || {};
const li = cache.local_image || { count: 0, size_mb: 0 };
diff --git a/migrations/0001_init.sql b/migrations/0001_init.sql
index fa959e9..c706bbd 100644
--- a/migrations/0001_init.sql
+++ b/migrations/0001_init.sql
@@ -51,6 +51,11 @@ CREATE TABLE IF NOT EXISTS request_logs (
status INTEGER NOT NULL,
key_name TEXT NOT NULL,
token_suffix TEXT NOT NULL,
+ total_tokens INTEGER NOT NULL DEFAULT 0,
+ input_tokens INTEGER NOT NULL DEFAULT 0,
+ output_tokens INTEGER NOT NULL DEFAULT 0,
+ reasoning_tokens INTEGER NOT NULL DEFAULT 0,
+ cached_tokens INTEGER NOT NULL DEFAULT 0,
error TEXT NOT NULL
);
diff --git a/migrations/0006_request_logs_usage.sql b/migrations/0006_request_logs_usage.sql
new file mode 100644
index 0000000..7466432
--- /dev/null
+++ b/migrations/0006_request_logs_usage.sql
@@ -0,0 +1,7 @@
+-- Add token usage columns to request_logs
+
+ALTER TABLE request_logs ADD COLUMN total_tokens INTEGER NOT NULL DEFAULT 0;
+ALTER TABLE request_logs ADD COLUMN input_tokens INTEGER NOT NULL DEFAULT 0;
+ALTER TABLE request_logs ADD COLUMN output_tokens INTEGER NOT NULL DEFAULT 0;
+ALTER TABLE request_logs ADD COLUMN reasoning_tokens INTEGER NOT NULL DEFAULT 0;
+ALTER TABLE request_logs ADD COLUMN cached_tokens INTEGER NOT NULL DEFAULT 0;
\ No newline at end of file
diff --git a/src/grok/processor.ts b/src/grok/processor.ts
index f0cfb83..9200010 100644
--- a/src/grok/processor.ts
+++ b/src/grok/processor.ts
@@ -1,4 +1,5 @@
import type { GrokSettings, GlobalSettings } from "../settings";
+import { buildChatUsageFromTexts, estimateInputTokensFromMessages, estimateTokens } from "../utils/token_usage";
type GrokNdjson = Record
;
@@ -121,7 +122,12 @@ export function createOpenAiStreamFromGrokNdjson(
settings: GrokSettings;
global: GlobalSettings;
origin: string;
- onFinish?: (result: { status: number; duration: number }) => Promise | void;
+ promptMessages?: Array<{ content?: unknown }>;
+ onFinish?: (result: {
+ status: number;
+ duration: number;
+ usage?: ReturnType;
+ }) => Promise | void;
},
): ReadableStream {
const { settings, global, origin } = opts;
@@ -141,6 +147,9 @@ export function createOpenAiStreamFromGrokNdjson(
const chunkTimeoutMs = Math.max(0, (settings.stream_chunk_timeout ?? 120) * 1000);
const totalTimeoutMs = Math.max(0, (settings.stream_total_timeout ?? 600) * 1000);
+ const promptEst = estimateInputTokensFromMessages(opts.promptMessages ?? []);
+ let completionText = "";
+
return new ReadableStream({
async start(controller) {
const body = grokResp.body;
@@ -372,14 +381,55 @@ export function createOpenAiStreamFromGrokNdjson(
shouldSkip = true;
}
- if (!shouldSkip) controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, content)));
+ if (!shouldSkip) {
+ completionText += content;
+ controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, content)));
+ }
isThinking = currentIsThinking;
}
}
+ const usage = buildChatUsageFromTexts({
+ promptTextTokens: promptEst.textTokens,
+ promptImageTokens: promptEst.imageTokens,
+ completionText,
+ });
+ controller.enqueue(
+ encoder.encode(
+ `data: ${JSON.stringify({
+ id,
+ object: "chat.completion.chunk",
+ created,
+ model: currentModel,
+ choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
+ usage: {
+ prompt_tokens: usage.input_tokens,
+ completion_tokens: usage.output_tokens,
+ total_tokens: usage.total_tokens,
+ prompt_tokens_details: {
+ cached_tokens: usage.cached_tokens,
+ text_tokens: usage.input_tokens_details.text_tokens,
+ audio_tokens: 0,
+ image_tokens: usage.input_tokens_details.image_tokens,
+ },
+ completion_tokens_details: {
+ text_tokens: usage.output_tokens_details.text_tokens,
+ audio_tokens: 0,
+ reasoning_tokens: usage.reasoning_tokens,
+ },
+ },
+ })}\n\n`,
+ ),
+ );
controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, "", "stop")));
controller.enqueue(encoder.encode(makeDone()));
- if (opts.onFinish) await opts.onFinish({ status: finalStatus, duration: (Date.now() - startTime) / 1000 });
+ if (opts.onFinish) {
+ await opts.onFinish({
+ status: finalStatus,
+ duration: (Date.now() - startTime) / 1000,
+ usage,
+ });
+ }
controller.close();
} catch (e) {
finalStatus = 500;
@@ -389,7 +439,9 @@ export function createOpenAiStreamFromGrokNdjson(
),
);
controller.enqueue(encoder.encode(makeDone()));
- if (opts.onFinish) await opts.onFinish({ status: finalStatus, duration: (Date.now() - startTime) / 1000 });
+ if (opts.onFinish) {
+ await opts.onFinish({ status: finalStatus, duration: (Date.now() - startTime) / 1000 });
+ }
controller.close();
} finally {
try {
@@ -404,7 +456,14 @@ export function createOpenAiStreamFromGrokNdjson(
export async function parseOpenAiFromGrokNdjson(
grokResp: Response,
- opts: { cookie: string; settings: GrokSettings; global: GlobalSettings; origin: string; requestedModel: string },
+ opts: {
+ cookie: string;
+ settings: GrokSettings;
+ global: GlobalSettings;
+ origin: string;
+ requestedModel: string;
+ promptMessages?: Array<{ content?: unknown }>;
+ },
): Promise> {
const { global, origin, requestedModel, settings } = opts;
const text = await grokResp.text();
@@ -471,6 +530,13 @@ export async function parseOpenAiFromGrokNdjson(
break;
}
+ const promptEst = estimateInputTokensFromMessages(opts.promptMessages ?? []);
+ const usage = buildChatUsageFromTexts({
+ promptTextTokens: promptEst.textTokens,
+ promptImageTokens: promptEst.imageTokens,
+ completionText: content,
+ });
+
return {
id: `chatcmpl-${crypto.randomUUID()}`,
object: "chat.completion",
@@ -483,6 +549,21 @@ export async function parseOpenAiFromGrokNdjson(
finish_reason: "stop",
},
],
- usage: null,
+ usage: {
+ prompt_tokens: usage.input_tokens,
+ completion_tokens: usage.output_tokens,
+ total_tokens: usage.total_tokens,
+ prompt_tokens_details: {
+ cached_tokens: usage.cached_tokens,
+ text_tokens: usage.input_tokens_details.text_tokens,
+ audio_tokens: 0,
+ image_tokens: usage.input_tokens_details.image_tokens,
+ },
+ completion_tokens_details: {
+ text_tokens: usage.output_tokens_details.text_tokens,
+ audio_tokens: 0,
+ reasoning_tokens: usage.reasoning_tokens,
+ },
+ },
};
}
diff --git a/src/repo/logs.ts b/src/repo/logs.ts
index 08e1502..1db5b64 100644
--- a/src/repo/logs.ts
+++ b/src/repo/logs.ts
@@ -1,5 +1,5 @@
import type { Env } from "../env";
-import { dbAll, dbRun } from "../db";
+import { dbAll, dbFirst, dbRun } from "../db";
import { nowMs, formatUtcMs } from "../utils/time";
export interface RequestLogRow {
@@ -12,19 +12,29 @@ export interface RequestLogRow {
status: number;
key_name: string;
token_suffix: string;
+ total_tokens: number;
+ input_tokens: number;
+ output_tokens: number;
+ reasoning_tokens: number;
+ cached_tokens: number;
error: string;
}
+type RequestLogInsert = Omit &
+ Partial> & {
+ id?: string;
+ };
+
export async function addRequestLog(
db: Env["DB"],
- entry: Omit & { id?: string },
+ entry: RequestLogInsert,
): Promise {
const ts = nowMs();
const id = entry.id ?? String(ts);
const time = formatUtcMs(ts);
await dbRun(
db,
- "INSERT INTO request_logs(id,time,timestamp,ip,model,duration,status,key_name,token_suffix,error) VALUES(?,?,?,?,?,?,?,?,?,?)",
+ "INSERT INTO request_logs(id,time,timestamp,ip,model,duration,status,key_name,token_suffix,total_tokens,input_tokens,output_tokens,reasoning_tokens,cached_tokens,error) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
[
id,
time,
@@ -35,6 +45,11 @@ export async function addRequestLog(
entry.status,
entry.key_name,
entry.token_suffix,
+ (entry as any).total_tokens ?? 0,
+ (entry as any).input_tokens ?? 0,
+ (entry as any).output_tokens ?? 0,
+ (entry as any).reasoning_tokens ?? 0,
+ (entry as any).cached_tokens ?? 0,
entry.error,
],
);
@@ -43,7 +58,7 @@ export async function addRequestLog(
export async function getRequestLogs(db: Env["DB"], limit = 1000): Promise {
return dbAll(
db,
- "SELECT id,time,timestamp,ip,model,duration,status,key_name,token_suffix,error FROM request_logs ORDER BY timestamp DESC LIMIT ?",
+ "SELECT id,time,timestamp,ip,model,duration,status,key_name,token_suffix,total_tokens,input_tokens,output_tokens,reasoning_tokens,cached_tokens,error FROM request_logs ORDER BY timestamp DESC LIMIT ?",
[limit],
);
}
@@ -56,7 +71,17 @@ export interface RequestStats {
hourly: Array<{ hour: string; success: number; failed: number }>;
daily: Array<{ date: string; success: number; failed: number }>;
models: Array<{ model: string; count: number }>;
- summary: { total: number; success: number; failed: number; success_rate: number };
+ summary: {
+ total: number;
+ success: number;
+ failed: number;
+ success_rate: number;
+ total_tokens: number;
+ input_tokens: number;
+ output_tokens: number;
+ reasoning_tokens: number;
+ cached_tokens: number;
+ };
}
function isSuccessStatus(status: number): boolean {
@@ -152,5 +177,32 @@ export async function getRequestStats(db: Env["DB"]): Promise {
const total = success + failed;
const success_rate = total > 0 ? Math.round((success / total) * 1000) / 10 : 0;
- return { hourly, daily, models, summary: { total, success, failed, success_rate } };
+ const tokenSumRow = await dbFirst<{
+ total_tokens: number;
+ input_tokens: number;
+ output_tokens: number;
+ reasoning_tokens: number;
+ cached_tokens: number;
+ }>(
+ db,
+ "SELECT SUM(total_tokens) as total_tokens, SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens, SUM(reasoning_tokens) as reasoning_tokens, SUM(cached_tokens) as cached_tokens FROM request_logs WHERE timestamp >= ?",
+ [since24h],
+ );
+
+ return {
+ hourly,
+ daily,
+ models,
+ summary: {
+ total,
+ success,
+ failed,
+ success_rate,
+ total_tokens: tokenSumRow?.total_tokens ?? 0,
+ input_tokens: tokenSumRow?.input_tokens ?? 0,
+ output_tokens: tokenSumRow?.output_tokens ?? 0,
+ reasoning_tokens: tokenSumRow?.reasoning_tokens ?? 0,
+ cached_tokens: tokenSumRow?.cached_tokens ?? 0,
+ },
+ };
}
diff --git a/src/routes/admin.ts b/src/routes/admin.ts
index db194bd..f323e35 100644
--- a/src/routes/admin.ts
+++ b/src/routes/admin.ts
@@ -1260,6 +1260,11 @@ adminRoutes.post("/api/logs/add", requireAdminAuth, async (c) => {
status: Number(body.status ?? 200),
key_name: "admin",
token_suffix: "",
+ total_tokens: 0,
+ input_tokens: 0,
+ output_tokens: 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
error: String(body.error ?? ""),
});
return c.json({ success: true });
diff --git a/src/routes/openai.ts b/src/routes/openai.ts
index b075b15..62a5136 100644
--- a/src/routes/openai.ts
+++ b/src/routes/openai.ts
@@ -9,6 +9,7 @@ import { uploadImage } from "../grok/upload";
import { getDynamicHeaders } from "../grok/headers";
import { createMediaPost, createPost } from "../grok/create";
import { createOpenAiStreamFromGrokNdjson, parseOpenAiFromGrokNdjson } from "../grok/processor";
+import { buildChatUsageFromTexts, buildImageUsageFromPrompt, estimateInputTokensFromMessages, estimateTokens } from "../utils/token_usage";
import {
IMAGE_METHOD_IMAGINE_WS_EXPERIMENTAL,
generateImagineWs,
@@ -26,6 +27,9 @@ import { nowMs } from "../utils/time";
import { arrayBufferToBase64 } from "../utils/base64";
import { upsertCacheRow } from "../repo/cache";
+const IMAGE_GENERATION_MODEL_ID = "grok-imagine-1.0";
+const IMAGE_EDIT_MODEL_ID = "grok-imagine-1.0-edit";
+
function openAiError(message: string, code: string): Record {
return { error: { message, type: "invalid_request_error", code } };
}
@@ -360,6 +364,7 @@ function createImageEventStream(args: {
cookie: string;
settings: Awaited>["grok"];
n: number;
+ prompt: string;
onFinish?: (result: { status: number; duration: number }) => Promise | void;
}): ReadableStream {
const encoder = new TextEncoder();
@@ -446,6 +451,7 @@ function createImageEventStream(args: {
}
}
+ const usageBase = buildImageUsageFromPrompt(args.prompt);
for (let i = 0; i < finalImages.length; i++) {
if (args.n === 1 && i !== targetIndex) continue;
const outIndex = args.n === 1 ? 0 : i;
@@ -456,10 +462,10 @@ function createImageEventStream(args: {
[responseField]: finalImages[i] ?? "",
index: outIndex,
usage: {
- total_tokens: 50,
- input_tokens: 25,
- output_tokens: 25,
- input_tokens_details: { text_tokens: 5, image_tokens: 20 },
+ total_tokens: usageBase.total_tokens,
+ input_tokens: usageBase.input_tokens,
+ output_tokens: usageBase.output_tokens,
+ input_tokens_details: usageBase.input_tokens_details,
},
}),
),
@@ -495,29 +501,49 @@ function getTokenSuffix(token: string): string {
return token.length >= 6 ? token.slice(-6) : token;
}
-const IMAGE_GENERATION_MODEL_ID = "grok-imagine-1.0";
-const IMAGE_EDIT_MODEL_ID = "grok-imagine-1.0-edit";
-
-function parseImageCount(input: unknown): number {
- const raw = Number(input ?? 1);
- if (!Number.isFinite(raw)) return 1;
- return Math.max(1, Math.min(10, Math.floor(raw)));
+function parseImageModel(input: unknown, fallback: string): string {
+ return String(input ?? fallback).trim() || fallback;
}
function parseImagePrompt(input: unknown): string {
- return String(input ?? "").trim();
+ if (input === undefined || input === null) return "";
+ if (typeof input === "string") return input.trim();
+ if (Array.isArray(input)) return input.map((v) => String(v ?? "")).join(" ").trim();
+ return String(input).trim();
}
-function parseImageModel(input: unknown, fallback: string): string {
- return String(input ?? fallback).trim() || fallback;
+function parseImageCount(input: unknown): number {
+ const raw = Number(input);
+ if (!Number.isFinite(raw)) return 1;
+ const value = Math.floor(raw);
+ return Math.max(1, Math.min(10, value));
}
-function parseImageStream(input: unknown): boolean {
- return toBool(input);
+function parseImageSize(input: unknown): string {
+ const value = String(input ?? "").trim().toLowerCase();
+ if (!value) return "1024x1024";
+ const allowed = new Set([
+ "256x256",
+ "512x512",
+ "1024x1024",
+ "1024x576",
+ "1280x720",
+ "1536x864",
+ "576x1024",
+ "720x1280",
+ "864x1536",
+ "1024x1536",
+ "512x768",
+ "768x1024",
+ "1536x1024",
+ "768x512",
+ "1024x768",
+ ]);
+ return allowed.has(value) ? value : "1024x1024";
}
-function parseImageSize(input: unknown): string {
- return String(input ?? "1024x1024").trim() || "1024x1024";
+function parseImageStream(input: unknown): boolean {
+ return toBool(input);
}
function parseImageConcurrencyOrError(
@@ -702,6 +728,7 @@ async function runExperimentalImageEditCall(args: {
function createSyntheticImageEventStream(args: {
selected: string[];
responseField: ImageResponseFormat;
+ prompt: string;
onFinish?: (result: { status: number; duration: number }) => Promise | void;
}): ReadableStream {
const encoder = new TextEncoder();
@@ -711,11 +738,11 @@ function createSyntheticImageEventStream(args: {
const startedAt = Date.now();
try {
let emitted = false;
+ const usageBase = buildImageUsageFromPrompt(args.prompt);
for (let i = 0; i < args.selected.length; i++) {
const value = args.selected[i];
if (!value || value === "error") continue;
emitted = true;
-
controller.enqueue(
encoder.encode(
buildImageSse("image_generation.partial_image", {
@@ -733,10 +760,10 @@ function createSyntheticImageEventStream(args: {
[args.responseField]: value,
index: i,
usage: {
- total_tokens: 50,
- input_tokens: 25,
- output_tokens: 25,
- input_tokens_details: { text_tokens: 5, image_tokens: 20 },
+ total_tokens: usageBase.total_tokens,
+ input_tokens: usageBase.input_tokens,
+ output_tokens: usageBase.output_tokens,
+ input_tokens_details: usageBase.input_tokens_details,
},
}),
),
@@ -778,6 +805,7 @@ function createSyntheticImageEventStream(args: {
function createStreamErrorImageEventStream(args: {
message: string;
responseField: ImageResponseFormat;
+ prompt: string;
onFinish?: (result: { status: number; duration: number }) => Promise | void;
}): ReadableStream {
const encoder = new TextEncoder();
@@ -793,6 +821,7 @@ function createStreamErrorImageEventStream(args: {
}),
),
);
+ const usageBase = buildImageUsageFromPrompt(args.prompt);
controller.enqueue(
encoder.encode(
buildImageSse("image_generation.completed", {
@@ -803,7 +832,7 @@ function createStreamErrorImageEventStream(args: {
total_tokens: 0,
input_tokens: 0,
output_tokens: 0,
- input_tokens_details: { text_tokens: 0, image_tokens: 0 },
+ input_tokens_details: usageBase.input_tokens_details,
},
}),
),
@@ -858,6 +887,7 @@ function createExperimentalImageEventStream(args: {
);
};
+ const usageBase = buildImageUsageFromPrompt(args.prompt);
const emitCompleted = (index: number, value: string) => {
if (index < 0 || index >= safeN) return;
if (completedByIndex.has(index)) return;
@@ -871,12 +901,12 @@ function createExperimentalImageEventStream(args: {
[args.responseField]: finalValue,
index,
usage: {
- total_tokens: isError ? 0 : 50,
- input_tokens: isError ? 0 : 25,
- output_tokens: isError ? 0 : 25,
+ total_tokens: isError ? 0 : usageBase.total_tokens,
+ input_tokens: isError ? 0 : usageBase.input_tokens,
+ output_tokens: isError ? 0 : usageBase.output_tokens,
input_tokens_details: {
- text_tokens: isError ? 0 : 5,
- image_tokens: isError ? 0 : 20,
+ text_tokens: isError ? 0 : usageBase.input_tokens_details.text_tokens,
+ image_tokens: isError ? 0 : usageBase.input_tokens_details.image_tokens,
},
},
}),
@@ -1026,12 +1056,14 @@ function invalidStreamNMessage(): string {
return "Streaming is only supported when n=1 or n=2";
}
-function imageUsagePayload(values: string[]) {
+function imageUsagePayload(values: string[], prompt: string) {
+ const base = buildImageUsageFromPrompt(prompt);
+ const successCount = values.filter((v) => v !== "error").length;
return {
- total_tokens: 0 * values.filter((v) => v !== "error").length,
- input_tokens: 0,
- output_tokens: 0 * values.filter((v) => v !== "error").length,
- input_tokens_details: { text_tokens: 0, image_tokens: 0 },
+ total_tokens: base.total_tokens * Math.max(1, successCount),
+ input_tokens: base.input_tokens * Math.max(1, successCount),
+ output_tokens: base.output_tokens * Math.max(1, successCount),
+ input_tokens_details: base.input_tokens_details,
};
}
@@ -1039,11 +1071,11 @@ function createdTs(): number {
return Math.floor(Date.now() / 1000);
}
-function buildImageJsonPayload(field: ImageResponseFormat, values: string[]) {
+function buildImageJsonPayload(field: ImageResponseFormat, values: string[], prompt: string) {
return {
created: createdTs(),
data: imageResponseData(field, values),
- usage: imageUsagePayload(values),
+ usage: imageUsagePayload(values, prompt),
};
}
@@ -1056,8 +1088,10 @@ async function recordImageLog(args: {
status: number;
tokenSuffix?: string;
error: string;
+ prompt?: string;
}) {
const duration = (Date.now() - args.start) / 1000;
+ const usage = args.prompt ? buildImageUsageFromPrompt(args.prompt) : null;
await addRequestLog(args.env.DB, {
ip: args.ip,
model: args.model,
@@ -1065,6 +1099,11 @@ async function recordImageLog(args: {
status: args.status,
key_name: args.keyName,
token_suffix: args.tokenSuffix ?? "",
+ total_tokens: usage?.total_tokens ?? 0,
+ input_tokens: usage?.input_tokens ?? 0,
+ output_tokens: usage?.output_tokens ?? 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
error: args.error,
});
}
@@ -1250,6 +1289,7 @@ openAiRoutes.post("/chat/completions", async (c) => {
const { content, images } = extractContent(body.messages as any);
const isVideoModel = Boolean(cfg.is_video_model);
const imgInputs = isVideoModel && images.length > 1 ? images.slice(0, 1) : images;
+ const promptMessages = body.messages as Array<{ content?: unknown }>;
try {
const uploads = await mapLimit(imgInputs, 5, (u) => uploadImage(u, cookie, settingsBundle.grok));
@@ -1303,7 +1343,14 @@ openAiRoutes.post("/chat/completions", async (c) => {
settings: settingsBundle.grok,
global: settingsBundle.global,
origin,
- onFinish: async ({ status, duration }) => {
+ promptMessages,
+ onFinish: async ({ status, duration, usage }) => {
+ const promptEst = estimateInputTokensFromMessages(promptMessages);
+ const resolved = usage ?? buildChatUsageFromTexts({
+ promptTextTokens: promptEst.textTokens,
+ promptImageTokens: promptEst.imageTokens,
+ completionText: "",
+ });
await addRequestLog(c.env.DB, {
ip,
model: requestedModel,
@@ -1311,6 +1358,11 @@ openAiRoutes.post("/chat/completions", async (c) => {
status,
key_name: keyName,
token_suffix: jwt.slice(-6),
+ total_tokens: resolved.total_tokens,
+ input_tokens: resolved.input_tokens,
+ output_tokens: resolved.output_tokens,
+ reasoning_tokens: resolved.reasoning_tokens,
+ cached_tokens: resolved.cached_tokens,
error: status === 200 ? "" : "stream_error",
});
},
@@ -1334,9 +1386,12 @@ openAiRoutes.post("/chat/completions", async (c) => {
global: settingsBundle.global,
origin,
requestedModel,
+ promptMessages,
});
const duration = (Date.now() - start) / 1000;
+ const usage = (json as any).usage as any;
+ const raw = usage?.completion_tokens_details ? usage : null;
await addRequestLog(c.env.DB, {
ip,
model: requestedModel,
@@ -1344,6 +1399,11 @@ openAiRoutes.post("/chat/completions", async (c) => {
status: 200,
key_name: keyName,
token_suffix: jwt.slice(-6),
+ total_tokens: Number(raw?.total_tokens ?? 0),
+ input_tokens: Number(raw?.prompt_tokens ?? 0),
+ output_tokens: Number(raw?.completion_tokens ?? 0),
+ reasoning_tokens: Number(raw?.completion_tokens_details?.reasoning_tokens ?? 0),
+ cached_tokens: Number(raw?.prompt_tokens_details?.cached_tokens ?? 0),
error: "",
});
@@ -1365,6 +1425,11 @@ openAiRoutes.post("/chat/completions", async (c) => {
status: 500,
key_name: keyName,
token_suffix: "",
+ total_tokens: 0,
+ input_tokens: 0,
+ output_tokens: 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
error: lastErr ?? "unknown_error",
});
@@ -1378,6 +1443,11 @@ openAiRoutes.post("/chat/completions", async (c) => {
status: 500,
key_name: keyName,
token_suffix: "",
+ total_tokens: 0,
+ input_tokens: 0,
+ output_tokens: 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
error: e instanceof Error ? e.message : String(e),
});
return c.json(openAiError("Internal error", "internal_error"), 500);
@@ -1390,6 +1460,7 @@ openAiRoutes.post("/images/generations", async (c) => {
const keyName = c.get("apiAuth").name ?? "Unknown";
const origin = new URL(c.req.url).origin;
+ let prompt = "";
let requestedModel = IMAGE_GENERATION_MODEL_ID;
try {
const body = (await c.req.json()) as {
@@ -1401,7 +1472,7 @@ openAiRoutes.post("/images/generations", async (c) => {
stream?: unknown;
response_format?: unknown;
};
- const prompt = parseImagePrompt(body.prompt);
+ prompt = parseImagePrompt(body.prompt);
const promptErr = nonEmptyPromptOrError(prompt);
if (promptErr) return c.json(openAiError(promptErr.message, promptErr.code), 400);
@@ -1475,6 +1546,11 @@ openAiRoutes.post("/images/generations", async (c) => {
status,
key_name: keyName,
token_suffix: getTokenSuffix(experimentalToken.token),
+ total_tokens: 0,
+ input_tokens: 0,
+ output_tokens: 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
error: status === 200 ? "" : "stream_error",
});
},
@@ -1493,11 +1569,13 @@ openAiRoutes.post("/images/generations", async (c) => {
keyName,
status: 503,
error: "NO_AVAILABLE_TOKEN",
+ prompt: imageCallPrompt("generation", prompt),
});
return new Response(
createStreamErrorImageEventStream({
message: "No available token",
responseField,
+ prompt: imageCallPrompt("generation", prompt),
}),
{ status: 200, headers: streamHeaders() },
);
@@ -1524,6 +1602,7 @@ openAiRoutes.post("/images/generations", async (c) => {
status: upstream.status,
tokenSuffix: getTokenSuffix(chosen.token),
error: txt.slice(0, 200),
+ prompt: imageCallPrompt("generation", prompt),
});
return new Response(
createStreamErrorImageEventStream({
@@ -1531,6 +1610,7 @@ openAiRoutes.post("/images/generations", async (c) => {
? txt.slice(0, 500)
: `Upstream ${upstream.status}`,
responseField,
+ prompt: imageCallPrompt("generation", prompt),
}),
{ status: 200, headers: streamHeaders() },
);
@@ -1543,6 +1623,7 @@ openAiRoutes.post("/images/generations", async (c) => {
cookie,
settings: settingsBundle.grok,
n,
+ prompt: imageCallPrompt("generation", prompt),
onFinish: async ({ status, duration }) => {
await addRequestLog(c.env.DB, {
ip,
@@ -1551,6 +1632,11 @@ openAiRoutes.post("/images/generations", async (c) => {
status,
key_name: keyName,
token_suffix: getTokenSuffix(chosen.token),
+ total_tokens: 0,
+ input_tokens: 0,
+ output_tokens: 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
error: status === 200 ? "" : "stream_error",
});
},
@@ -1583,8 +1669,9 @@ openAiRoutes.post("/images/generations", async (c) => {
status: 200,
tokenSuffix: getTokenSuffix(experimentalToken.token),
error: "",
+ prompt: imageCallPrompt("generation", prompt),
});
- return c.json(buildImageJsonPayload(responseField, selected));
+ return c.json(buildImageJsonPayload(responseField, selected, imageCallPrompt("generation", prompt)));
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
await recordTokenFailure(c.env.DB, experimentalToken.token, 500, msg.slice(0, 200));
@@ -1599,26 +1686,26 @@ openAiRoutes.post("/images/generations", async (c) => {
Array.from({ length: calls }),
Math.min(calls, Math.max(1, concurrency)),
async () => {
- const chosen = await selectBestToken(c.env.DB, requestedModel);
- if (!chosen) throw new Error("No available token");
- const cookie = buildCookie(chosen.token, cf);
- try {
- return await runImageCall({
- requestModel: requestedModel,
- prompt: imageCallPrompt("generation", prompt),
- fileIds: [],
- cookie,
- settings: settingsBundle.grok,
- responseFormat,
- baseUrl,
- });
- } catch (e) {
- const msg = e instanceof Error ? e.message : String(e);
- await recordTokenFailure(c.env.DB, chosen.token, 500, msg.slice(0, 200));
- await applyCooldown(c.env.DB, chosen.token, 500);
- throw e;
- }
- },
+ const chosen = await selectBestToken(c.env.DB, requestedModel);
+ if (!chosen) throw new Error("No available token");
+ const cookie = buildCookie(chosen.token, cf);
+ try {
+ return await runImageCall({
+ requestModel: requestedModel,
+ prompt: imageCallPrompt("generation", prompt),
+ fileIds: [],
+ cookie,
+ settings: settingsBundle.grok,
+ responseFormat,
+ baseUrl,
+ });
+ } catch (e) {
+ const msg = e instanceof Error ? e.message : String(e);
+ await recordTokenFailure(c.env.DB, chosen.token, 500, msg.slice(0, 200));
+ await applyCooldown(c.env.DB, chosen.token, 500);
+ throw e;
+ }
+ },
);
const urls = dedupeImages(urlsNested.flat().filter(Boolean));
const selected = pickImageResults(urls, n);
@@ -1631,9 +1718,10 @@ openAiRoutes.post("/images/generations", async (c) => {
keyName,
status: 200,
error: "",
+ prompt: imageCallPrompt("generation", prompt),
});
- return c.json(buildImageJsonPayload(responseField, selected));
+ return c.json(buildImageJsonPayload(responseField, selected, imageCallPrompt("generation", prompt)));
} catch (e) {
const message = e instanceof Error ? e.message : String(e);
if (isContentModerationMessage(message)) {
@@ -1645,6 +1733,7 @@ openAiRoutes.post("/images/generations", async (c) => {
keyName,
status: 400,
error: message,
+ prompt: imageCallPrompt("generation", prompt || ""),
});
return c.json(openAiError(message, "content_policy_violation"), 400);
}
@@ -1656,6 +1745,7 @@ openAiRoutes.post("/images/generations", async (c) => {
keyName,
status: 500,
error: message,
+ prompt: imageCallPrompt("generation", prompt || ""),
});
return c.json(openAiError(message || "Internal error", "internal_error"), 500);
}
@@ -1668,10 +1758,11 @@ openAiRoutes.post("/images/edits", async (c) => {
const origin = new URL(c.req.url).origin;
const maxImageBytes = 50 * 1024 * 1024;
+ let prompt = "";
let requestedModel = IMAGE_EDIT_MODEL_ID;
try {
const form = await c.req.formData();
- const prompt = parseImagePrompt(form.get("prompt"));
+ prompt = parseImagePrompt(form.get("prompt"));
const promptErr = nonEmptyPromptOrError(prompt);
if (promptErr) return c.json(openAiError(promptErr.message, promptErr.code), 400);
@@ -1728,11 +1819,13 @@ openAiRoutes.post("/images/edits", async (c) => {
keyName,
status: 503,
error: "NO_AVAILABLE_TOKEN",
+ prompt: imageCallPrompt("edit", prompt),
});
return new Response(
createStreamErrorImageEventStream({
message: "No available token",
responseField,
+ prompt: imageCallPrompt("edit", prompt),
}),
{ status: 200, headers: streamHeaders() },
);
@@ -1784,6 +1877,7 @@ openAiRoutes.post("/images/edits", async (c) => {
cookie,
settings: settingsBundle.grok,
n,
+ prompt: imageCallPrompt("edit", prompt),
onFinish: async ({ status, duration }) => {
await addRequestLog(c.env.DB, {
ip,
@@ -1792,6 +1886,11 @@ openAiRoutes.post("/images/edits", async (c) => {
status,
key_name: keyName,
token_suffix: getTokenSuffix(chosen.token),
+ total_tokens: 0,
+ input_tokens: 0,
+ output_tokens: 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
error: status === 200 ? "" : "stream_error",
});
},
@@ -1825,6 +1924,7 @@ openAiRoutes.post("/images/edits", async (c) => {
status: upstream.status,
tokenSuffix: getTokenSuffix(chosen.token),
error: txt.slice(0, 200),
+ prompt: imageCallPrompt("edit", prompt),
});
return new Response(
createStreamErrorImageEventStream({
@@ -1832,6 +1932,7 @@ openAiRoutes.post("/images/edits", async (c) => {
? txt.slice(0, 500)
: `Upstream ${upstream.status}`,
responseField,
+ prompt: imageCallPrompt("edit", prompt),
}),
{ status: 200, headers: streamHeaders() },
);
@@ -1844,6 +1945,7 @@ openAiRoutes.post("/images/edits", async (c) => {
cookie,
settings: settingsBundle.grok,
n,
+ prompt: imageCallPrompt("edit", prompt),
onFinish: async ({ status, duration }) => {
await addRequestLog(c.env.DB, {
ip,
@@ -1852,6 +1954,11 @@ openAiRoutes.post("/images/edits", async (c) => {
status,
key_name: keyName,
token_suffix: getTokenSuffix(chosen.token),
+ total_tokens: 0,
+ input_tokens: 0,
+ output_tokens: 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
error: status === 200 ? "" : "stream_error",
});
},
@@ -1885,8 +1992,9 @@ openAiRoutes.post("/images/edits", async (c) => {
status: 200,
tokenSuffix: getTokenSuffix(chosen.token),
error: "",
+ prompt: imageCallPrompt("edit", prompt),
});
- return c.json(buildImageJsonPayload(responseField, selected));
+ return c.json(buildImageJsonPayload(responseField, selected, imageCallPrompt("edit", prompt)));
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
await recordTokenFailure(c.env.DB, chosen.token, 500, msg.slice(0, 200));
@@ -1919,9 +2027,10 @@ openAiRoutes.post("/images/edits", async (c) => {
status: 200,
tokenSuffix: getTokenSuffix(chosen.token),
error: "",
+ prompt: imageCallPrompt("edit", prompt),
});
- return c.json(buildImageJsonPayload(responseField, selected));
+ return c.json(buildImageJsonPayload(responseField, selected, imageCallPrompt("edit", prompt)));
} catch (e) {
const message = e instanceof Error ? e.message : String(e);
if (isContentModerationMessage(message)) {
@@ -1933,6 +2042,7 @@ openAiRoutes.post("/images/edits", async (c) => {
keyName,
status: 400,
error: message,
+ prompt: imageCallPrompt("edit", prompt || ""),
});
return c.json(openAiError(message, "content_policy_violation"), 400);
}
@@ -1944,6 +2054,7 @@ openAiRoutes.post("/images/edits", async (c) => {
keyName,
status: 500,
error: message,
+ prompt: imageCallPrompt("edit", prompt || ""),
});
return c.json(openAiError(message || "Internal error", "internal_error"), 500);
}
diff --git a/src/utils/token_usage.ts b/src/utils/token_usage.ts
new file mode 100644
index 0000000..4d61d55
--- /dev/null
+++ b/src/utils/token_usage.ts
@@ -0,0 +1,93 @@
+export interface TokenUsageCounts {
+ total_tokens: number;
+ input_tokens: number;
+ output_tokens: number;
+ reasoning_tokens: number;
+ cached_tokens: number;
+ input_tokens_details: { text_tokens: number; image_tokens: number };
+ output_tokens_details: { text_tokens: number; reasoning_tokens: number };
+}
+
+export function estimateTokens(text: string): number {
+ const raw = String(text || "");
+ if (!raw) return 0;
+ let ascii = 0;
+ let nonAscii = 0;
+ for (const ch of raw) {
+ if (ch.charCodeAt(0) <= 0x7f) ascii += 1;
+ else nonAscii += 1;
+ }
+ const asciiTokens = Math.ceil(ascii / 4);
+ return asciiTokens + nonAscii;
+}
+
+export function splitThinkSegments(text: string): { reasoningText: string; outputText: string } {
+ const raw = String(text || "");
+ if (!raw) return { reasoningText: "", outputText: "" };
+ const regex = /([\s\S]*?)<\/think>/gi;
+ const reasoningParts: string[] = [];
+ let output = raw;
+ let match: RegExpExecArray | null;
+ while ((match = regex.exec(raw))) {
+ if (match[1]) reasoningParts.push(match[1]);
+ }
+ output = output.replace(regex, "");
+ return { reasoningText: reasoningParts.join("\n"), outputText: output };
+}
+
+export function estimateInputTokensFromMessages(messages: Array<{ content?: unknown }>): {
+ textTokens: number;
+ imageTokens: number;
+ promptTokens: number;
+} {
+ const parts: string[] = [];
+ let imageTokens = 0;
+ for (const msg of messages || []) {
+ const content = (msg as any)?.content;
+ if (Array.isArray(content)) {
+ for (const item of content) {
+ if (item?.type === "text" && typeof item.text === "string") parts.push(item.text);
+ if (item?.type === "image_url") imageTokens += 0;
+ }
+ } else if (typeof content === "string") {
+ parts.push(content);
+ }
+ }
+ const textTokens = estimateTokens(parts.join("\n"));
+ return { textTokens, imageTokens, promptTokens: textTokens + imageTokens };
+}
+
+export function buildChatUsageFromTexts(args: {
+ promptTextTokens: number;
+ promptImageTokens: number;
+ completionText: string;
+}): TokenUsageCounts {
+ const { reasoningText, outputText } = splitThinkSegments(args.completionText);
+ const completionTextTokens = estimateTokens(outputText);
+ const reasoningTokens = estimateTokens(reasoningText);
+ const outputTokens = completionTextTokens + reasoningTokens;
+ const inputTokens = args.promptTextTokens + args.promptImageTokens;
+ const totalTokens = inputTokens + outputTokens;
+ return {
+ total_tokens: totalTokens,
+ input_tokens: inputTokens,
+ output_tokens: outputTokens,
+ reasoning_tokens: reasoningTokens,
+ cached_tokens: 0,
+ input_tokens_details: { text_tokens: args.promptTextTokens, image_tokens: args.promptImageTokens },
+ output_tokens_details: { text_tokens: completionTextTokens, reasoning_tokens: reasoningTokens },
+ };
+}
+
+export function buildImageUsageFromPrompt(prompt: string): TokenUsageCounts {
+ const inputTokens = estimateTokens(prompt || "");
+ return {
+ total_tokens: inputTokens,
+ input_tokens: inputTokens,
+ output_tokens: 0,
+ reasoning_tokens: 0,
+ cached_tokens: 0,
+ input_tokens_details: { text_tokens: inputTokens, image_tokens: 0 },
+ output_tokens_details: { text_tokens: 0, reasoning_tokens: 0 },
+ };
+}
\ No newline at end of file