From 8d54a017d77f8d015a49e9b75918fe666836edf5 Mon Sep 17 00:00:00 2001 From: ZIC143 <2022220052@email.szu.edu.cn> Date: Sat, 7 Feb 2026 21:05:05 +0800 Subject: [PATCH 1/5] feat: enhance token usage tracking in request logging and statistics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added total_tokens, input_tokens, output_tokens, reasoning_tokens, and cached_tokens fields to RequestLog and RequestLogger classes. - Updated RequestStats to track and aggregate token usage metrics for hourly and daily statistics. - Modified database schema to include new token usage columns in request_logs table. - Implemented token usage estimation utilities for both chat and image prompts. - Enhanced OpenAI routes to log detailed token usage information during request processing. - Updated TypeScript utilities to support token estimation and usage tracking for image generation and editing. feat: 增强请求日志和统计中的 token 使用量跟踪 - 在 RequestLog 和 RequestLogger 类中添加了 total_tokens、input_tokens、output_tokens、reasoning_tokens 和 cached_tokens 字段。 - 更新 RequestStats 以跟踪和汇总每小时及每日 token 使用量指标。 - 修改数据库 schema,在 request_logs 表中新增 token 使用量相关列。 - 实现了适用于聊天和图像提示的 token 使用量估算工具。 - 增强 OpenAI 路由,在请求处理过程中记录详细的 token 使用信息。 - 更新 TypeScript 工具,支持图像生成和编辑的 token 估算与使用量跟踪。 --- app/api/v1/image.py | 66 +++++++++- app/services/grok/chat.py | 18 ++- app/services/grok/processor.py | 10 +- app/services/request_logger.py | 15 +++ app/services/request_stats.py | 99 +++++++++++++-- app/services/token_usage.py | 110 ++++++++++++++++ migrations/0001_init.sql | 5 + migrations/0006_request_logs_usage.sql | 7 + src/grok/processor.ts | 75 ++++++++++- src/repo/logs.ts | 55 +++++++- src/routes/openai.ts | 169 ++++++++++++++++--------- src/utils/token_usage.ts | 93 ++++++++++++++ 12 files changed, 630 insertions(+), 92 deletions(-) create mode 100644 app/services/token_usage.py create mode 100644 migrations/0006_request_logs_usage.sql create mode 100644 src/utils/token_usage.ts diff --git a/app/api/v1/image.py b/app/api/v1/image.py index f39cab5..241fd2c 100644 --- a/app/api/v1/image.py +++ b/app/api/v1/image.py @@ -29,6 +29,7 @@ from app.services.grok.processor import ImageCollectProcessor, ImageStreamProcessor from app.services.quota import enforce_daily_quota from app.services.request_stats import request_stats +from app.services.token_usage import build_image_usage from app.services.token import get_token_manager @@ -555,6 +556,23 @@ async def _record_request(model_id: str, success: bool): pass +async def _record_request_with_usage(model_id: str, success: bool, prompt: str, success_count: int = 1): + try: + usage = build_image_usage(prompt, success_count=success_count) + raw = usage.get("_raw") or {} + await request_stats.record_request( + model_id, + success=success, + total_tokens=int(usage.get("total_tokens", 0) or 0), + input_tokens=int(usage.get("input_tokens", 0) or 0), + output_tokens=int(usage.get("output_tokens", 0) or 0), + reasoning_tokens=int(raw.get("reasoning_tokens", 0) or 0), + cached_tokens=int(raw.get("cached_tokens", 0) or 0), + ) + except Exception: + pass + + async def _get_token_for_model(model_id: str): """获取指定模型可用 token,失败时抛出统一异常""" try: @@ -704,7 +722,12 @@ async def _wrapped_experimental_stream(): consume_on_fail=True, is_usage=True, ) - await _record_request(model_info.model_id, True) + await _record_request_with_usage( + model_info.model_id, + True, + f"Image Generation: {request.prompt}", + success_count=n, + ) else: await _record_request(model_info.model_id, False) except Exception: @@ -752,7 +775,12 @@ async def _wrapped_stream(): consume_on_fail=True, is_usage=True, ) - await _record_request(model_info.model_id, True) + await _record_request_with_usage( + model_info.model_id, + True, + f"Image Generation: {request.prompt}", + success_count=n, + ) else: await _record_request(model_info.model_id, False) except Exception: @@ -811,7 +839,15 @@ async def _wrapped_stream(): consume_on_fail=True, is_usage=True, ) - await _record_request(model_info.model_id, bool(success)) + if success: + await _record_request_with_usage( + model_info.model_id, + True, + f"Image Generation: {request.prompt}", + success_count=n, + ) + else: + await _record_request(model_info.model_id, False) except Exception: pass @@ -964,7 +1000,12 @@ async def _wrapped_experimental_stream(): consume_on_fail=True, is_usage=True, ) - await _record_request(model_info.model_id, True) + await _record_request_with_usage( + model_info.model_id, + True, + f"Image Edit: {edit_request.prompt}", + success_count=n, + ) else: await _record_request(model_info.model_id, False) except Exception: @@ -1015,7 +1056,12 @@ async def _wrapped_stream(): consume_on_fail=True, is_usage=True, ) - await _record_request(model_info.model_id, True) + await _record_request_with_usage( + model_info.model_id, + True, + f"Image Edit: {edit_request.prompt}", + success_count=n, + ) else: await _record_request(model_info.model_id, False) except Exception: @@ -1100,7 +1146,15 @@ async def _wrapped_stream(): consume_on_fail=True, is_usage=True, ) - await _record_request(model_info.model_id, bool(success)) + if success: + await _record_request_with_usage( + model_info.model_id, + True, + f"Image Edit: {edit_request.prompt}", + success_count=n, + ) + else: + await _record_request(model_info.model_id, False) except Exception: pass diff --git a/app/services/grok/chat.py b/app/services/grok/chat.py index 747d9a6..471d2fb 100644 --- a/app/services/grok/chat.py +++ b/app/services/grok/chat.py @@ -533,6 +533,7 @@ async def completions( # 处理响应 if is_stream: processor = StreamProcessor(model_name, token, think).process(response) + prompt_messages = [msg.model_dump() for msg in messages] async def _wrapped_stream(): completed = False @@ -553,10 +554,23 @@ async def _wrapped_stream(): return _wrapped_stream() - result = await CollectProcessor(model_name, token).process(response) + result = await CollectProcessor(model_name, token).process( + response, + prompt_messages=[msg.model_dump() for msg in messages], + ) try: + usage = result.get("usage") or {} + raw = usage.get("_raw") or {} await token_mgr.sync_usage(token, model_name, consume_on_fail=True, is_usage=True) - await request_stats.record_request(model_name, success=True) + await request_stats.record_request( + model_name, + success=True, + total_tokens=int(usage.get("total_tokens", 0) or 0), + input_tokens=int(usage.get("prompt_tokens", 0) or 0), + output_tokens=int(usage.get("completion_tokens", 0) or 0), + reasoning_tokens=int(raw.get("reasoning_tokens", 0) or 0), + cached_tokens=int(raw.get("cached_tokens", 0) or 0), + ) except Exception: pass return result diff --git a/app/services/grok/processor.py b/app/services/grok/processor.py index b50cb0a..f1c8183 100644 --- a/app/services/grok/processor.py +++ b/app/services/grok/processor.py @@ -11,6 +11,7 @@ from app.core.config import get_config from app.core.logger import logger from app.services.grok.assets import DownloadService +from app.services.token_usage import build_chat_usage ASSET_URL = "https://assets.grok.com/" @@ -211,7 +212,7 @@ def __init__(self, model: str, token: str = ""): super().__init__(model, token) self.image_format = get_config("app.image_format", "url") - async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]: + async def process(self, response: AsyncIterable[bytes], prompt_messages: Optional[list[dict]] = None) -> dict[str, Any]: """处理并收集完整响应""" response_id = "" fingerprint = "" @@ -261,6 +262,7 @@ async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]: finally: await self.close() + usage = build_chat_usage(prompt_messages or [], content) return { "id": response_id, "object": "chat.completion", @@ -272,11 +274,7 @@ async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]: "message": {"role": "assistant", "content": content, "refusal": None, "annotations": []}, "finish_reason": "stop" }], - "usage": { - "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, - "prompt_tokens_details": {"cached_tokens": 0, "text_tokens": 0, "audio_tokens": 0, "image_tokens": 0}, - "completion_tokens_details": {"text_tokens": 0, "audio_tokens": 0, "reasoning_tokens": 0} - } + "usage": usage } diff --git a/app/services/request_logger.py b/app/services/request_logger.py index 5272e42..839a1ad 100644 --- a/app/services/request_logger.py +++ b/app/services/request_logger.py @@ -21,6 +21,11 @@ class RequestLog: status: int key_name: str token_suffix: str + total_tokens: int = 0 + input_tokens: int = 0 + output_tokens: int = 0 + reasoning_tokens: int = 0 + cached_tokens: int = 0 error: str = "" class RequestLogger: @@ -95,6 +100,11 @@ async def add_log(self, status: int, key_name: str, token_suffix: str = "", + total_tokens: int = 0, + input_tokens: int = 0, + output_tokens: int = 0, + reasoning_tokens: int = 0, + cached_tokens: int = 0, error: str = ""): """添加日志""" if not self._loaded: @@ -115,6 +125,11 @@ async def add_log(self, "status": status, "key_name": key_name, "token_suffix": token_suffix, + "total_tokens": int(total_tokens or 0), + "input_tokens": int(input_tokens or 0), + "output_tokens": int(output_tokens or 0), + "reasoning_tokens": int(reasoning_tokens or 0), + "cached_tokens": int(cached_tokens or 0), "error": error } diff --git a/app/services/request_stats.py b/app/services/request_stats.py index 4718e48..f02429d 100644 --- a/app/services/request_stats.py +++ b/app/services/request_stats.py @@ -28,8 +28,26 @@ def __init__(self): self.file_path = Path(__file__).parents[2] / "data" / "stats.json" # 统计数据 - self._hourly: Dict[str, Dict[str, int]] = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0}) - self._daily: Dict[str, Dict[str, int]] = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0}) + self._hourly: Dict[str, Dict[str, int]] = defaultdict(lambda: { + "total": 0, + "success": 0, + "failed": 0, + "total_tokens": 0, + "input_tokens": 0, + "output_tokens": 0, + "reasoning_tokens": 0, + "cached_tokens": 0, + }) + self._daily: Dict[str, Dict[str, int]] = defaultdict(lambda: { + "total": 0, + "success": 0, + "failed": 0, + "total_tokens": 0, + "input_tokens": 0, + "output_tokens": 0, + "reasoning_tokens": 0, + "cached_tokens": 0, + }) self._models: Dict[str, int] = defaultdict(int) # 保留策略 @@ -61,10 +79,28 @@ async def _load_data(self): data = orjson.loads(content) # 恢复 defaultdict 结构 - self._hourly = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0}) + self._hourly = defaultdict(lambda: { + "total": 0, + "success": 0, + "failed": 0, + "total_tokens": 0, + "input_tokens": 0, + "output_tokens": 0, + "reasoning_tokens": 0, + "cached_tokens": 0, + }) self._hourly.update(data.get("hourly", {})) - self._daily = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0}) + self._daily = defaultdict(lambda: { + "total": 0, + "success": 0, + "failed": 0, + "total_tokens": 0, + "input_tokens": 0, + "output_tokens": 0, + "reasoning_tokens": 0, + "cached_tokens": 0, + }) self._daily.update(data.get("daily", {})) self._models = defaultdict(int) @@ -96,7 +132,16 @@ async def _save_data(self): except Exception as e: logger.error(f"[Stats] 保存数据失败: {e}") - async def record_request(self, model: str, success: bool) -> None: + async def record_request( + self, + model: str, + success: bool, + total_tokens: int = 0, + input_tokens: int = 0, + output_tokens: int = 0, + reasoning_tokens: int = 0, + cached_tokens: int = 0, + ) -> None: """记录一次请求""" if not self._loaded: await self.init() @@ -111,6 +156,11 @@ async def record_request(self, model: str, success: bool) -> None: self._hourly[hour_key]["success"] += 1 else: self._hourly[hour_key]["failed"] += 1 + self._hourly[hour_key]["total_tokens"] += int(total_tokens or 0) + self._hourly[hour_key]["input_tokens"] += int(input_tokens or 0) + self._hourly[hour_key]["output_tokens"] += int(output_tokens or 0) + self._hourly[hour_key]["reasoning_tokens"] += int(reasoning_tokens or 0) + self._hourly[hour_key]["cached_tokens"] += int(cached_tokens or 0) # 天统计 self._daily[day_key]["total"] += 1 @@ -118,6 +168,11 @@ async def record_request(self, model: str, success: bool) -> None: self._daily[day_key]["success"] += 1 else: self._daily[day_key]["failed"] += 1 + self._daily[day_key]["total_tokens"] += int(total_tokens or 0) + self._daily[day_key]["input_tokens"] += int(input_tokens or 0) + self._daily[day_key]["output_tokens"] += int(output_tokens or 0) + self._daily[day_key]["reasoning_tokens"] += int(reasoning_tokens or 0) + self._daily[day_key]["cached_tokens"] += int(cached_tokens or 0) # 模型统计 self._models[model] += 1 @@ -154,7 +209,16 @@ def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]: from datetime import timedelta dt = now - timedelta(hours=i) key = dt.strftime("%Y-%m-%dT%H") - data = self._hourly.get(key, {"total": 0, "success": 0, "failed": 0}) + data = self._hourly.get(key, { + "total": 0, + "success": 0, + "failed": 0, + "total_tokens": 0, + "input_tokens": 0, + "output_tokens": 0, + "reasoning_tokens": 0, + "cached_tokens": 0, + }) hourly_data.append({ "hour": dt.strftime("%H:00"), "date": dt.strftime("%m-%d"), @@ -167,7 +231,16 @@ def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]: from datetime import timedelta dt = now - timedelta(days=i) key = dt.strftime("%Y-%m-%d") - data = self._daily.get(key, {"total": 0, "success": 0, "failed": 0}) + data = self._daily.get(key, { + "total": 0, + "success": 0, + "failed": 0, + "total_tokens": 0, + "input_tokens": 0, + "output_tokens": 0, + "reasoning_tokens": 0, + "cached_tokens": 0, + }) daily_data.append({ "date": dt.strftime("%m-%d"), **data @@ -180,6 +253,11 @@ def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]: total_requests = sum(d["total"] for d in self._hourly.values()) total_success = sum(d["success"] for d in self._hourly.values()) total_failed = sum(d["failed"] for d in self._hourly.values()) + total_tokens = sum(int(d.get("total_tokens", 0) or 0) for d in self._hourly.values()) + input_tokens = sum(int(d.get("input_tokens", 0) or 0) for d in self._hourly.values()) + output_tokens = sum(int(d.get("output_tokens", 0) or 0) for d in self._hourly.values()) + reasoning_tokens = sum(int(d.get("reasoning_tokens", 0) or 0) for d in self._hourly.values()) + cached_tokens = sum(int(d.get("cached_tokens", 0) or 0) for d in self._hourly.values()) return { "hourly": hourly_data, @@ -189,7 +267,12 @@ def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]: "total": total_requests, "success": total_success, "failed": total_failed, - "success_rate": round(total_success / total_requests * 100, 1) if total_requests > 0 else 0 + "success_rate": round(total_success / total_requests * 100, 1) if total_requests > 0 else 0, + "total_tokens": total_tokens, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "reasoning_tokens": reasoning_tokens, + "cached_tokens": cached_tokens, } } diff --git a/app/services/token_usage.py b/app/services/token_usage.py new file mode 100644 index 0000000..fc91651 --- /dev/null +++ b/app/services/token_usage.py @@ -0,0 +1,110 @@ +"""Token usage estimation utilities (no external tokenizer).""" + +from __future__ import annotations + +from typing import Any, Dict, List, Tuple + + +def _count_tokens(text: str) -> int: + if not text: + return 0 + ascii_count = 0 + non_ascii = 0 + for ch in text: + if ord(ch) <= 0x7F: + ascii_count += 1 + else: + non_ascii += 1 + return (ascii_count + 3) // 4 + non_ascii + + +def _split_think(text: str) -> Tuple[str, str]: + if not text: + return "", "" + reasoning_parts: List[str] = [] + output = text + start = 0 + while True: + s = output.find("", start) + if s < 0: + break + e = output.find("", s + 7) + if e < 0: + break + reasoning_parts.append(output[s + 7 : e]) + output = output[:s] + output[e + 8 :] + start = s + return "\n".join(reasoning_parts), output + + +def estimate_prompt_tokens(messages: List[Dict[str, Any]]) -> Dict[str, int]: + text_parts: List[str] = [] + image_tokens = 0 + for msg in messages or []: + content = msg.get("content") + if isinstance(content, list): + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + t = str(item.get("text") or "") + if t.strip(): + text_parts.append(t) + else: + t = str(content or "") + if t.strip(): + text_parts.append(t) + text_tokens = _count_tokens("\n".join(text_parts)) + return { + "text_tokens": text_tokens, + "image_tokens": image_tokens, + "prompt_tokens": text_tokens + image_tokens, + } + + +def build_chat_usage(messages: List[Dict[str, Any]], completion_text: str) -> Dict[str, Any]: + prompt = estimate_prompt_tokens(messages) + reasoning_text, output_text = _split_think(completion_text or "") + completion_text_tokens = _count_tokens(output_text) + reasoning_tokens = _count_tokens(reasoning_text) + output_tokens = completion_text_tokens + reasoning_tokens + input_tokens = prompt["prompt_tokens"] + total_tokens = input_tokens + output_tokens + return { + "prompt_tokens": input_tokens, + "completion_tokens": output_tokens, + "total_tokens": total_tokens, + "prompt_tokens_details": { + "cached_tokens": 0, + "text_tokens": prompt["text_tokens"], + "audio_tokens": 0, + "image_tokens": prompt["image_tokens"], + }, + "completion_tokens_details": { + "text_tokens": completion_text_tokens, + "audio_tokens": 0, + "reasoning_tokens": reasoning_tokens, + }, + "_raw": { + "reasoning_tokens": reasoning_tokens, + "cached_tokens": 0, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + }, + } + + +def build_image_usage(prompt: str, success_count: int = 1) -> Dict[str, Any]: + text_tokens = _count_tokens(str(prompt or "")) + success = max(1, int(success_count or 1)) + total_tokens = text_tokens * success + return { + "total_tokens": total_tokens, + "input_tokens": total_tokens, + "output_tokens": 0, + "input_tokens_details": {"text_tokens": text_tokens, "image_tokens": 0}, + "_raw": { + "reasoning_tokens": 0, + "cached_tokens": 0, + "input_tokens": total_tokens, + "output_tokens": 0, + }, + } diff --git a/migrations/0001_init.sql b/migrations/0001_init.sql index fa959e9..c706bbd 100644 --- a/migrations/0001_init.sql +++ b/migrations/0001_init.sql @@ -51,6 +51,11 @@ CREATE TABLE IF NOT EXISTS request_logs ( status INTEGER NOT NULL, key_name TEXT NOT NULL, token_suffix TEXT NOT NULL, + total_tokens INTEGER NOT NULL DEFAULT 0, + input_tokens INTEGER NOT NULL DEFAULT 0, + output_tokens INTEGER NOT NULL DEFAULT 0, + reasoning_tokens INTEGER NOT NULL DEFAULT 0, + cached_tokens INTEGER NOT NULL DEFAULT 0, error TEXT NOT NULL ); diff --git a/migrations/0006_request_logs_usage.sql b/migrations/0006_request_logs_usage.sql new file mode 100644 index 0000000..7466432 --- /dev/null +++ b/migrations/0006_request_logs_usage.sql @@ -0,0 +1,7 @@ +-- Add token usage columns to request_logs + +ALTER TABLE request_logs ADD COLUMN total_tokens INTEGER NOT NULL DEFAULT 0; +ALTER TABLE request_logs ADD COLUMN input_tokens INTEGER NOT NULL DEFAULT 0; +ALTER TABLE request_logs ADD COLUMN output_tokens INTEGER NOT NULL DEFAULT 0; +ALTER TABLE request_logs ADD COLUMN reasoning_tokens INTEGER NOT NULL DEFAULT 0; +ALTER TABLE request_logs ADD COLUMN cached_tokens INTEGER NOT NULL DEFAULT 0; \ No newline at end of file diff --git a/src/grok/processor.ts b/src/grok/processor.ts index f0cfb83..c6de5d2 100644 --- a/src/grok/processor.ts +++ b/src/grok/processor.ts @@ -1,4 +1,5 @@ import type { GrokSettings, GlobalSettings } from "../settings"; +import { buildChatUsageFromTexts, estimateInputTokensFromMessages, estimateTokens } from "../utils/token_usage"; type GrokNdjson = Record; @@ -121,6 +122,7 @@ export function createOpenAiStreamFromGrokNdjson( settings: GrokSettings; global: GlobalSettings; origin: string; + promptMessages?: Array<{ content?: unknown }>; onFinish?: (result: { status: number; duration: number }) => Promise | void; }, ): ReadableStream { @@ -141,6 +143,9 @@ export function createOpenAiStreamFromGrokNdjson( const chunkTimeoutMs = Math.max(0, (settings.stream_chunk_timeout ?? 120) * 1000); const totalTimeoutMs = Math.max(0, (settings.stream_total_timeout ?? 600) * 1000); + const promptEst = estimateInputTokensFromMessages(opts.promptMessages ?? []); + let completionText = ""; + return new ReadableStream({ async start(controller) { const body = grokResp.body; @@ -372,11 +377,46 @@ export function createOpenAiStreamFromGrokNdjson( shouldSkip = true; } - if (!shouldSkip) controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, content))); + if (!shouldSkip) { + completionText += content; + controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, content))); + } isThinking = currentIsThinking; } } + const usage = buildChatUsageFromTexts({ + promptTextTokens: promptEst.textTokens, + promptImageTokens: promptEst.imageTokens, + completionText, + }); + controller.enqueue( + encoder.encode( + `data: ${JSON.stringify({ + id, + object: "chat.completion.chunk", + created, + model: currentModel, + choices: [{ index: 0, delta: {}, finish_reason: "stop" }], + usage: { + prompt_tokens: usage.input_tokens, + completion_tokens: usage.output_tokens, + total_tokens: usage.total_tokens, + prompt_tokens_details: { + cached_tokens: usage.cached_tokens, + text_tokens: usage.input_tokens_details.text_tokens, + audio_tokens: 0, + image_tokens: usage.input_tokens_details.image_tokens, + }, + completion_tokens_details: { + text_tokens: usage.output_tokens_details.text_tokens, + audio_tokens: 0, + reasoning_tokens: usage.reasoning_tokens, + }, + }, + })}\n\n`, + ), + ); controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, "", "stop"))); controller.enqueue(encoder.encode(makeDone())); if (opts.onFinish) await opts.onFinish({ status: finalStatus, duration: (Date.now() - startTime) / 1000 }); @@ -404,7 +444,14 @@ export function createOpenAiStreamFromGrokNdjson( export async function parseOpenAiFromGrokNdjson( grokResp: Response, - opts: { cookie: string; settings: GrokSettings; global: GlobalSettings; origin: string; requestedModel: string }, + opts: { + cookie: string; + settings: GrokSettings; + global: GlobalSettings; + origin: string; + requestedModel: string; + promptMessages?: Array<{ content?: unknown }>; + }, ): Promise> { const { global, origin, requestedModel, settings } = opts; const text = await grokResp.text(); @@ -471,6 +518,13 @@ export async function parseOpenAiFromGrokNdjson( break; } + const promptEst = estimateInputTokensFromMessages(opts.promptMessages ?? []); + const usage = buildChatUsageFromTexts({ + promptTextTokens: promptEst.textTokens, + promptImageTokens: promptEst.imageTokens, + completionText: content, + }); + return { id: `chatcmpl-${crypto.randomUUID()}`, object: "chat.completion", @@ -483,6 +537,21 @@ export async function parseOpenAiFromGrokNdjson( finish_reason: "stop", }, ], - usage: null, + usage: { + prompt_tokens: usage.input_tokens, + completion_tokens: usage.output_tokens, + total_tokens: usage.total_tokens, + prompt_tokens_details: { + cached_tokens: usage.cached_tokens, + text_tokens: usage.input_tokens_details.text_tokens, + audio_tokens: 0, + image_tokens: usage.input_tokens_details.image_tokens, + }, + completion_tokens_details: { + text_tokens: usage.output_tokens_details.text_tokens, + audio_tokens: 0, + reasoning_tokens: usage.reasoning_tokens, + }, + }, }; } diff --git a/src/repo/logs.ts b/src/repo/logs.ts index 08e1502..3e7d9a8 100644 --- a/src/repo/logs.ts +++ b/src/repo/logs.ts @@ -12,6 +12,11 @@ export interface RequestLogRow { status: number; key_name: string; token_suffix: string; + total_tokens: number; + input_tokens: number; + output_tokens: number; + reasoning_tokens: number; + cached_tokens: number; error: string; } @@ -24,7 +29,7 @@ export async function addRequestLog( const time = formatUtcMs(ts); await dbRun( db, - "INSERT INTO request_logs(id,time,timestamp,ip,model,duration,status,key_name,token_suffix,error) VALUES(?,?,?,?,?,?,?,?,?,?)", + "INSERT INTO request_logs(id,time,timestamp,ip,model,duration,status,key_name,token_suffix,total_tokens,input_tokens,output_tokens,reasoning_tokens,cached_tokens,error) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)", [ id, time, @@ -35,6 +40,11 @@ export async function addRequestLog( entry.status, entry.key_name, entry.token_suffix, + (entry as any).total_tokens ?? 0, + (entry as any).input_tokens ?? 0, + (entry as any).output_tokens ?? 0, + (entry as any).reasoning_tokens ?? 0, + (entry as any).cached_tokens ?? 0, entry.error, ], ); @@ -43,7 +53,7 @@ export async function addRequestLog( export async function getRequestLogs(db: Env["DB"], limit = 1000): Promise { return dbAll( db, - "SELECT id,time,timestamp,ip,model,duration,status,key_name,token_suffix,error FROM request_logs ORDER BY timestamp DESC LIMIT ?", + "SELECT id,time,timestamp,ip,model,duration,status,key_name,token_suffix,total_tokens,input_tokens,output_tokens,reasoning_tokens,cached_tokens,error FROM request_logs ORDER BY timestamp DESC LIMIT ?", [limit], ); } @@ -56,7 +66,17 @@ export interface RequestStats { hourly: Array<{ hour: string; success: number; failed: number }>; daily: Array<{ date: string; success: number; failed: number }>; models: Array<{ model: string; count: number }>; - summary: { total: number; success: number; failed: number; success_rate: number }; + summary: { + total: number; + success: number; + failed: number; + success_rate: number; + total_tokens: number; + input_tokens: number; + output_tokens: number; + reasoning_tokens: number; + cached_tokens: number; + }; } function isSuccessStatus(status: number): boolean { @@ -152,5 +172,32 @@ export async function getRequestStats(db: Env["DB"]): Promise { const total = success + failed; const success_rate = total > 0 ? Math.round((success / total) * 1000) / 10 : 0; - return { hourly, daily, models, summary: { total, success, failed, success_rate } }; + const tokenSumRow = await dbFirst<{ + total_tokens: number; + input_tokens: number; + output_tokens: number; + reasoning_tokens: number; + cached_tokens: number; + }>( + db, + "SELECT SUM(total_tokens) as total_tokens, SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens, SUM(reasoning_tokens) as reasoning_tokens, SUM(cached_tokens) as cached_tokens FROM request_logs WHERE timestamp >= ?", + [since24h], + ); + + return { + hourly, + daily, + models, + summary: { + total, + success, + failed, + success_rate, + total_tokens: tokenSumRow?.total_tokens ?? 0, + input_tokens: tokenSumRow?.input_tokens ?? 0, + output_tokens: tokenSumRow?.output_tokens ?? 0, + reasoning_tokens: tokenSumRow?.reasoning_tokens ?? 0, + cached_tokens: tokenSumRow?.cached_tokens ?? 0, + }, + }; } diff --git a/src/routes/openai.ts b/src/routes/openai.ts index b075b15..df6f89f 100644 --- a/src/routes/openai.ts +++ b/src/routes/openai.ts @@ -9,6 +9,7 @@ import { uploadImage } from "../grok/upload"; import { getDynamicHeaders } from "../grok/headers"; import { createMediaPost, createPost } from "../grok/create"; import { createOpenAiStreamFromGrokNdjson, parseOpenAiFromGrokNdjson } from "../grok/processor"; +import { buildChatUsageFromTexts, buildImageUsageFromPrompt, estimateInputTokensFromMessages, estimateTokens } from "../utils/token_usage"; import { IMAGE_METHOD_IMAGINE_WS_EXPERIMENTAL, generateImagineWs, @@ -360,6 +361,7 @@ function createImageEventStream(args: { cookie: string; settings: Awaited>["grok"]; n: number; + prompt: string; onFinish?: (result: { status: number; duration: number }) => Promise | void; }): ReadableStream { const encoder = new TextEncoder(); @@ -446,6 +448,7 @@ function createImageEventStream(args: { } } + const usageBase = buildImageUsageFromPrompt(args.prompt); for (let i = 0; i < finalImages.length; i++) { if (args.n === 1 && i !== targetIndex) continue; const outIndex = args.n === 1 ? 0 : i; @@ -456,10 +459,10 @@ function createImageEventStream(args: { [responseField]: finalImages[i] ?? "", index: outIndex, usage: { - total_tokens: 50, - input_tokens: 25, - output_tokens: 25, - input_tokens_details: { text_tokens: 5, image_tokens: 20 }, + total_tokens: usageBase.total_tokens, + input_tokens: usageBase.input_tokens, + output_tokens: usageBase.output_tokens, + input_tokens_details: usageBase.input_tokens_details, }, }), ), @@ -495,19 +498,6 @@ function getTokenSuffix(token: string): string { return token.length >= 6 ? token.slice(-6) : token; } -const IMAGE_GENERATION_MODEL_ID = "grok-imagine-1.0"; -const IMAGE_EDIT_MODEL_ID = "grok-imagine-1.0-edit"; - -function parseImageCount(input: unknown): number { - const raw = Number(input ?? 1); - if (!Number.isFinite(raw)) return 1; - return Math.max(1, Math.min(10, Math.floor(raw))); -} - -function parseImagePrompt(input: unknown): string { - return String(input ?? "").trim(); -} - function parseImageModel(input: unknown, fallback: string): string { return String(input ?? fallback).trim() || fallback; } @@ -516,10 +506,6 @@ function parseImageStream(input: unknown): boolean { return toBool(input); } -function parseImageSize(input: unknown): string { - return String(input ?? "1024x1024").trim() || "1024x1024"; -} - function parseImageConcurrencyOrError( input: unknown, ): { value: number } | { error: { message: string; code: string } } { @@ -702,6 +688,7 @@ async function runExperimentalImageEditCall(args: { function createSyntheticImageEventStream(args: { selected: string[]; responseField: ImageResponseFormat; + prompt: string; onFinish?: (result: { status: number; duration: number }) => Promise | void; }): ReadableStream { const encoder = new TextEncoder(); @@ -711,11 +698,11 @@ function createSyntheticImageEventStream(args: { const startedAt = Date.now(); try { let emitted = false; + const usageBase = buildImageUsageFromPrompt(args.prompt); for (let i = 0; i < args.selected.length; i++) { const value = args.selected[i]; if (!value || value === "error") continue; emitted = true; - controller.enqueue( encoder.encode( buildImageSse("image_generation.partial_image", { @@ -733,10 +720,10 @@ function createSyntheticImageEventStream(args: { [args.responseField]: value, index: i, usage: { - total_tokens: 50, - input_tokens: 25, - output_tokens: 25, - input_tokens_details: { text_tokens: 5, image_tokens: 20 }, + total_tokens: usageBase.total_tokens, + input_tokens: usageBase.input_tokens, + output_tokens: usageBase.output_tokens, + input_tokens_details: usageBase.input_tokens_details, }, }), ), @@ -778,6 +765,7 @@ function createSyntheticImageEventStream(args: { function createStreamErrorImageEventStream(args: { message: string; responseField: ImageResponseFormat; + prompt: string; onFinish?: (result: { status: number; duration: number }) => Promise | void; }): ReadableStream { const encoder = new TextEncoder(); @@ -793,6 +781,7 @@ function createStreamErrorImageEventStream(args: { }), ), ); + const usageBase = buildImageUsageFromPrompt(args.prompt); controller.enqueue( encoder.encode( buildImageSse("image_generation.completed", { @@ -803,7 +792,7 @@ function createStreamErrorImageEventStream(args: { total_tokens: 0, input_tokens: 0, output_tokens: 0, - input_tokens_details: { text_tokens: 0, image_tokens: 0 }, + input_tokens_details: usageBase.input_tokens_details, }, }), ), @@ -858,6 +847,7 @@ function createExperimentalImageEventStream(args: { ); }; + const usageBase = buildImageUsageFromPrompt(args.prompt); const emitCompleted = (index: number, value: string) => { if (index < 0 || index >= safeN) return; if (completedByIndex.has(index)) return; @@ -871,12 +861,12 @@ function createExperimentalImageEventStream(args: { [args.responseField]: finalValue, index, usage: { - total_tokens: isError ? 0 : 50, - input_tokens: isError ? 0 : 25, - output_tokens: isError ? 0 : 25, + total_tokens: isError ? 0 : usageBase.total_tokens, + input_tokens: isError ? 0 : usageBase.input_tokens, + output_tokens: isError ? 0 : usageBase.output_tokens, input_tokens_details: { - text_tokens: isError ? 0 : 5, - image_tokens: isError ? 0 : 20, + text_tokens: isError ? 0 : usageBase.input_tokens_details.text_tokens, + image_tokens: isError ? 0 : usageBase.input_tokens_details.image_tokens, }, }, }), @@ -1026,12 +1016,14 @@ function invalidStreamNMessage(): string { return "Streaming is only supported when n=1 or n=2"; } -function imageUsagePayload(values: string[]) { +function imageUsagePayload(values: string[], prompt: string) { + const base = buildImageUsageFromPrompt(prompt); + const successCount = values.filter((v) => v !== "error").length; return { - total_tokens: 0 * values.filter((v) => v !== "error").length, - input_tokens: 0, - output_tokens: 0 * values.filter((v) => v !== "error").length, - input_tokens_details: { text_tokens: 0, image_tokens: 0 }, + total_tokens: base.total_tokens * Math.max(1, successCount), + input_tokens: base.input_tokens * Math.max(1, successCount), + output_tokens: base.output_tokens * Math.max(1, successCount), + input_tokens_details: base.input_tokens_details, }; } @@ -1039,11 +1031,11 @@ function createdTs(): number { return Math.floor(Date.now() / 1000); } -function buildImageJsonPayload(field: ImageResponseFormat, values: string[]) { +function buildImageJsonPayload(field: ImageResponseFormat, values: string[], prompt: string) { return { created: createdTs(), data: imageResponseData(field, values), - usage: imageUsagePayload(values), + usage: imageUsagePayload(values, prompt), }; } @@ -1056,8 +1048,10 @@ async function recordImageLog(args: { status: number; tokenSuffix?: string; error: string; + prompt?: string; }) { const duration = (Date.now() - args.start) / 1000; + const usage = args.prompt ? buildImageUsageFromPrompt(args.prompt) : null; await addRequestLog(args.env.DB, { ip: args.ip, model: args.model, @@ -1065,6 +1059,11 @@ async function recordImageLog(args: { status: args.status, key_name: args.keyName, token_suffix: args.tokenSuffix ?? "", + total_tokens: usage?.total_tokens ?? 0, + input_tokens: usage?.input_tokens ?? 0, + output_tokens: usage?.output_tokens ?? 0, + reasoning_tokens: 0, + cached_tokens: 0, error: args.error, }); } @@ -1250,6 +1249,7 @@ openAiRoutes.post("/chat/completions", async (c) => { const { content, images } = extractContent(body.messages as any); const isVideoModel = Boolean(cfg.is_video_model); const imgInputs = isVideoModel && images.length > 1 ? images.slice(0, 1) : images; + const promptMessages = body.messages as Array<{ content?: unknown }>; try { const uploads = await mapLimit(imgInputs, 5, (u) => uploadImage(u, cookie, settingsBundle.grok)); @@ -1303,7 +1303,9 @@ openAiRoutes.post("/chat/completions", async (c) => { settings: settingsBundle.grok, global: settingsBundle.global, origin, + promptMessages, onFinish: async ({ status, duration }) => { + const promptEst = estimateInputTokensFromMessages(promptMessages); await addRequestLog(c.env.DB, { ip, model: requestedModel, @@ -1311,6 +1313,11 @@ openAiRoutes.post("/chat/completions", async (c) => { status, key_name: keyName, token_suffix: jwt.slice(-6), + total_tokens: promptEst.promptTokens, + input_tokens: promptEst.promptTokens, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, error: status === 200 ? "" : "stream_error", }); }, @@ -1334,9 +1341,12 @@ openAiRoutes.post("/chat/completions", async (c) => { global: settingsBundle.global, origin, requestedModel, + promptMessages, }); const duration = (Date.now() - start) / 1000; + const usage = (json as any).usage as any; + const raw = usage?.completion_tokens_details ? usage : null; await addRequestLog(c.env.DB, { ip, model: requestedModel, @@ -1344,6 +1354,11 @@ openAiRoutes.post("/chat/completions", async (c) => { status: 200, key_name: keyName, token_suffix: jwt.slice(-6), + total_tokens: Number(raw?.total_tokens ?? 0), + input_tokens: Number(raw?.prompt_tokens ?? 0), + output_tokens: Number(raw?.completion_tokens ?? 0), + reasoning_tokens: Number(raw?.completion_tokens_details?.reasoning_tokens ?? 0), + cached_tokens: Number(raw?.prompt_tokens_details?.cached_tokens ?? 0), error: "", }); @@ -1365,6 +1380,11 @@ openAiRoutes.post("/chat/completions", async (c) => { status: 500, key_name: keyName, token_suffix: "", + total_tokens: 0, + input_tokens: 0, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, error: lastErr ?? "unknown_error", }); @@ -1378,6 +1398,11 @@ openAiRoutes.post("/chat/completions", async (c) => { status: 500, key_name: keyName, token_suffix: "", + total_tokens: 0, + input_tokens: 0, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, error: e instanceof Error ? e.message : String(e), }); return c.json(openAiError("Internal error", "internal_error"), 500); @@ -1493,11 +1518,13 @@ openAiRoutes.post("/images/generations", async (c) => { keyName, status: 503, error: "NO_AVAILABLE_TOKEN", + prompt: imageCallPrompt("generation", prompt), }); return new Response( createStreamErrorImageEventStream({ message: "No available token", responseField, + prompt: imageCallPrompt("generation", prompt), }), { status: 200, headers: streamHeaders() }, ); @@ -1524,6 +1551,7 @@ openAiRoutes.post("/images/generations", async (c) => { status: upstream.status, tokenSuffix: getTokenSuffix(chosen.token), error: txt.slice(0, 200), + prompt: imageCallPrompt("generation", prompt), }); return new Response( createStreamErrorImageEventStream({ @@ -1531,6 +1559,7 @@ openAiRoutes.post("/images/generations", async (c) => { ? txt.slice(0, 500) : `Upstream ${upstream.status}`, responseField, + prompt: imageCallPrompt("generation", prompt), }), { status: 200, headers: streamHeaders() }, ); @@ -1543,6 +1572,7 @@ openAiRoutes.post("/images/generations", async (c) => { cookie, settings: settingsBundle.grok, n, + prompt: imageCallPrompt("generation", prompt), onFinish: async ({ status, duration }) => { await addRequestLog(c.env.DB, { ip, @@ -1583,8 +1613,9 @@ openAiRoutes.post("/images/generations", async (c) => { status: 200, tokenSuffix: getTokenSuffix(experimentalToken.token), error: "", + prompt: imageCallPrompt("generation", prompt), }); - return c.json(buildImageJsonPayload(responseField, selected)); + return c.json(buildImageJsonPayload(responseField, selected, imageCallPrompt("generation", prompt))); } catch (e) { const msg = e instanceof Error ? e.message : String(e); await recordTokenFailure(c.env.DB, experimentalToken.token, 500, msg.slice(0, 200)); @@ -1599,26 +1630,26 @@ openAiRoutes.post("/images/generations", async (c) => { Array.from({ length: calls }), Math.min(calls, Math.max(1, concurrency)), async () => { - const chosen = await selectBestToken(c.env.DB, requestedModel); - if (!chosen) throw new Error("No available token"); - const cookie = buildCookie(chosen.token, cf); - try { - return await runImageCall({ - requestModel: requestedModel, - prompt: imageCallPrompt("generation", prompt), - fileIds: [], - cookie, - settings: settingsBundle.grok, - responseFormat, - baseUrl, - }); - } catch (e) { - const msg = e instanceof Error ? e.message : String(e); - await recordTokenFailure(c.env.DB, chosen.token, 500, msg.slice(0, 200)); - await applyCooldown(c.env.DB, chosen.token, 500); - throw e; - } - }, + const chosen = await selectBestToken(c.env.DB, requestedModel); + if (!chosen) throw new Error("No available token"); + const cookie = buildCookie(chosen.token, cf); + try { + return await runImageCall({ + requestModel: requestedModel, + prompt: imageCallPrompt("generation", prompt), + fileIds: [], + cookie, + settings: settingsBundle.grok, + responseFormat, + baseUrl, + }); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + await recordTokenFailure(c.env.DB, chosen.token, 500, msg.slice(0, 200)); + await applyCooldown(c.env.DB, chosen.token, 500); + throw e; + } + }, ); const urls = dedupeImages(urlsNested.flat().filter(Boolean)); const selected = pickImageResults(urls, n); @@ -1631,9 +1662,10 @@ openAiRoutes.post("/images/generations", async (c) => { keyName, status: 200, error: "", + prompt: imageCallPrompt("generation", prompt), }); - return c.json(buildImageJsonPayload(responseField, selected)); + return c.json(buildImageJsonPayload(responseField, selected, imageCallPrompt("generation", prompt))); } catch (e) { const message = e instanceof Error ? e.message : String(e); if (isContentModerationMessage(message)) { @@ -1645,6 +1677,7 @@ openAiRoutes.post("/images/generations", async (c) => { keyName, status: 400, error: message, + prompt: imageCallPrompt("generation", prompt), }); return c.json(openAiError(message, "content_policy_violation"), 400); } @@ -1656,6 +1689,7 @@ openAiRoutes.post("/images/generations", async (c) => { keyName, status: 500, error: message, + prompt: imageCallPrompt("generation", prompt), }); return c.json(openAiError(message || "Internal error", "internal_error"), 500); } @@ -1728,11 +1762,13 @@ openAiRoutes.post("/images/edits", async (c) => { keyName, status: 503, error: "NO_AVAILABLE_TOKEN", + prompt: imageCallPrompt("edit", prompt), }); return new Response( createStreamErrorImageEventStream({ message: "No available token", responseField, + prompt: imageCallPrompt("edit", prompt), }), { status: 200, headers: streamHeaders() }, ); @@ -1825,6 +1861,7 @@ openAiRoutes.post("/images/edits", async (c) => { status: upstream.status, tokenSuffix: getTokenSuffix(chosen.token), error: txt.slice(0, 200), + prompt: imageCallPrompt("edit", prompt), }); return new Response( createStreamErrorImageEventStream({ @@ -1832,6 +1869,7 @@ openAiRoutes.post("/images/edits", async (c) => { ? txt.slice(0, 500) : `Upstream ${upstream.status}`, responseField, + prompt: imageCallPrompt("edit", prompt), }), { status: 200, headers: streamHeaders() }, ); @@ -1844,6 +1882,7 @@ openAiRoutes.post("/images/edits", async (c) => { cookie, settings: settingsBundle.grok, n, + prompt: imageCallPrompt("edit", prompt), onFinish: async ({ status, duration }) => { await addRequestLog(c.env.DB, { ip, @@ -1885,8 +1924,9 @@ openAiRoutes.post("/images/edits", async (c) => { status: 200, tokenSuffix: getTokenSuffix(chosen.token), error: "", + prompt: imageCallPrompt("edit", prompt), }); - return c.json(buildImageJsonPayload(responseField, selected)); + return c.json(buildImageJsonPayload(responseField, selected, imageCallPrompt("edit", prompt))); } catch (e) { const msg = e instanceof Error ? e.message : String(e); await recordTokenFailure(c.env.DB, chosen.token, 500, msg.slice(0, 200)); @@ -1919,9 +1959,10 @@ openAiRoutes.post("/images/edits", async (c) => { status: 200, tokenSuffix: getTokenSuffix(chosen.token), error: "", + prompt: imageCallPrompt("edit", prompt), }); - return c.json(buildImageJsonPayload(responseField, selected)); + return c.json(buildImageJsonPayload(responseField, selected, imageCallPrompt("edit", prompt))); } catch (e) { const message = e instanceof Error ? e.message : String(e); if (isContentModerationMessage(message)) { @@ -1933,6 +1974,7 @@ openAiRoutes.post("/images/edits", async (c) => { keyName, status: 400, error: message, + prompt: imageCallPrompt("edit", prompt), }); return c.json(openAiError(message, "content_policy_violation"), 400); } @@ -1944,6 +1986,7 @@ openAiRoutes.post("/images/edits", async (c) => { keyName, status: 500, error: message, + prompt: imageCallPrompt("edit", prompt), }); return c.json(openAiError(message || "Internal error", "internal_error"), 500); } diff --git a/src/utils/token_usage.ts b/src/utils/token_usage.ts new file mode 100644 index 0000000..4d61d55 --- /dev/null +++ b/src/utils/token_usage.ts @@ -0,0 +1,93 @@ +export interface TokenUsageCounts { + total_tokens: number; + input_tokens: number; + output_tokens: number; + reasoning_tokens: number; + cached_tokens: number; + input_tokens_details: { text_tokens: number; image_tokens: number }; + output_tokens_details: { text_tokens: number; reasoning_tokens: number }; +} + +export function estimateTokens(text: string): number { + const raw = String(text || ""); + if (!raw) return 0; + let ascii = 0; + let nonAscii = 0; + for (const ch of raw) { + if (ch.charCodeAt(0) <= 0x7f) ascii += 1; + else nonAscii += 1; + } + const asciiTokens = Math.ceil(ascii / 4); + return asciiTokens + nonAscii; +} + +export function splitThinkSegments(text: string): { reasoningText: string; outputText: string } { + const raw = String(text || ""); + if (!raw) return { reasoningText: "", outputText: "" }; + const regex = /([\s\S]*?)<\/think>/gi; + const reasoningParts: string[] = []; + let output = raw; + let match: RegExpExecArray | null; + while ((match = regex.exec(raw))) { + if (match[1]) reasoningParts.push(match[1]); + } + output = output.replace(regex, ""); + return { reasoningText: reasoningParts.join("\n"), outputText: output }; +} + +export function estimateInputTokensFromMessages(messages: Array<{ content?: unknown }>): { + textTokens: number; + imageTokens: number; + promptTokens: number; +} { + const parts: string[] = []; + let imageTokens = 0; + for (const msg of messages || []) { + const content = (msg as any)?.content; + if (Array.isArray(content)) { + for (const item of content) { + if (item?.type === "text" && typeof item.text === "string") parts.push(item.text); + if (item?.type === "image_url") imageTokens += 0; + } + } else if (typeof content === "string") { + parts.push(content); + } + } + const textTokens = estimateTokens(parts.join("\n")); + return { textTokens, imageTokens, promptTokens: textTokens + imageTokens }; +} + +export function buildChatUsageFromTexts(args: { + promptTextTokens: number; + promptImageTokens: number; + completionText: string; +}): TokenUsageCounts { + const { reasoningText, outputText } = splitThinkSegments(args.completionText); + const completionTextTokens = estimateTokens(outputText); + const reasoningTokens = estimateTokens(reasoningText); + const outputTokens = completionTextTokens + reasoningTokens; + const inputTokens = args.promptTextTokens + args.promptImageTokens; + const totalTokens = inputTokens + outputTokens; + return { + total_tokens: totalTokens, + input_tokens: inputTokens, + output_tokens: outputTokens, + reasoning_tokens: reasoningTokens, + cached_tokens: 0, + input_tokens_details: { text_tokens: args.promptTextTokens, image_tokens: args.promptImageTokens }, + output_tokens_details: { text_tokens: completionTextTokens, reasoning_tokens: reasoningTokens }, + }; +} + +export function buildImageUsageFromPrompt(prompt: string): TokenUsageCounts { + const inputTokens = estimateTokens(prompt || ""); + return { + total_tokens: inputTokens, + input_tokens: inputTokens, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, + input_tokens_details: { text_tokens: inputTokens, image_tokens: 0 }, + output_tokens_details: { text_tokens: 0, reasoning_tokens: 0 }, + }; +} \ No newline at end of file From bc617cbcaceb409242588ac16cfea17c38e6937b Mon Sep 17 00:00:00 2001 From: ZIC143 <2022220052@email.szu.edu.cn> Date: Sat, 7 Feb 2026 21:07:18 +0800 Subject: [PATCH 2/5] =?UTF-8?q?feat(datacenter):=20add=20token=20metrics?= =?UTF-8?q?=20display=20in=20UI=20feat(=E6=95=B0=E6=8D=AE=E4=B8=AD?= =?UTF-8?q?=E5=BF=83)=EF=BC=9A=E5=9C=A8=20UI=20=E4=B8=AD=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BB=A4=E7=89=8C=E6=8C=87=E6=A0=87=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/static/datacenter/datacenter.html | 23 +++++++++++++++++++++++ app/static/datacenter/datacenter.js | 5 +++++ 2 files changed, 28 insertions(+) diff --git a/app/static/datacenter/datacenter.html b/app/static/datacenter/datacenter.html index 6b1fc8d..c0c03d1 100644 --- a/app/static/datacenter/datacenter.html +++ b/app/static/datacenter/datacenter.html @@ -88,6 +88,29 @@

数据中心

+
+
+
-
+
总 Tokens
+
+
+
-
+
输入 Tokens
+
+
+
-
+
输出 Tokens
+
+
+
-
+
思考 Tokens
+
+
+
-
+
缓存 Tokens
+
+
+
diff --git a/app/static/datacenter/datacenter.js b/app/static/datacenter/datacenter.js index 2d3087c..383cfc6 100644 --- a/app/static/datacenter/datacenter.js +++ b/app/static/datacenter/datacenter.js @@ -108,6 +108,11 @@ function updateMetricsUI(data) { setText('m-req-success', safeNum(sum.success).toLocaleString()); setText('m-req-failed', safeNum(sum.failed).toLocaleString()); setText('m-success-rate', formatPercent(safeNum(sum.success_rate))); + setText('m-total-tokens', safeNum(sum.total_tokens).toLocaleString()); + setText('m-input-tokens', safeNum(sum.input_tokens).toLocaleString()); + setText('m-output-tokens', safeNum(sum.output_tokens).toLocaleString()); + setText('m-reasoning-tokens', safeNum(sum.reasoning_tokens).toLocaleString()); + setText('m-cached-tokens', safeNum(sum.cached_tokens).toLocaleString()); const cache = data.cache || {}; const li = cache.local_image || { count: 0, size_mb: 0 }; From d0a064c436e3ffd902012ef6207e3deb00d7df69 Mon Sep 17 00:00:00 2001 From: ZIC143 <2022220052@email.szu.edu.cn> Date: Sat, 7 Feb 2026 21:11:01 +0800 Subject: [PATCH 3/5] =?UTF-8?q?feat(chat):=20enhance=20token=20usage=20tra?= =?UTF-8?q?cking=20in=20stream=20processing=20and=20logging=20feat(chat)?= =?UTF-8?q?=EF=BC=9A=E5=A2=9E=E5=BC=BA=E6=B5=81=E5=BC=8F=E5=A4=84=E7=90=86?= =?UTF-8?q?=E5=92=8C=E6=97=A5=E5=BF=97=E4=B8=AD=E7=9A=84=20token=20?= =?UTF-8?q?=E4=BD=BF=E7=94=A8=E9=87=8F=E8=B7=9F=E8=B8=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/services/grok/chat.py | 15 +++++++++++++-- app/services/grok/processor.py | 18 +++++++++++++++++- src/grok/processor.ts | 18 +++++++++++++++--- src/routes/openai.ts | 17 +++++++++++------ 4 files changed, 56 insertions(+), 12 deletions(-) diff --git a/app/services/grok/chat.py b/app/services/grok/chat.py index 471d2fb..e476ceb 100644 --- a/app/services/grok/chat.py +++ b/app/services/grok/chat.py @@ -532,7 +532,8 @@ async def completions( # 处理响应 if is_stream: - processor = StreamProcessor(model_name, token, think).process(response) + stream_processor = StreamProcessor(model_name, token, think) + processor = stream_processor.process(response) prompt_messages = [msg.model_dump() for msg in messages] async def _wrapped_stream(): @@ -545,8 +546,18 @@ async def _wrapped_stream(): # Only count as "success" when the stream ends naturally. try: if completed: + usage = stream_processor.build_usage(prompt_messages) + raw = usage.get("_raw") or {} await token_mgr.sync_usage(token, model_name, consume_on_fail=True, is_usage=True) - await request_stats.record_request(model_name, success=True) + await request_stats.record_request( + model_name, + success=True, + total_tokens=int(usage.get("total_tokens", 0) or 0), + input_tokens=int(usage.get("prompt_tokens", 0) or 0), + output_tokens=int(usage.get("completion_tokens", 0) or 0), + reasoning_tokens=int(raw.get("reasoning_tokens", 0) or 0), + cached_tokens=int(raw.get("cached_tokens", 0) or 0), + ) else: await request_stats.record_request(model_name, success=False) except Exception: diff --git a/app/services/grok/processor.py b/app/services/grok/processor.py index f1c8183..3be986a 100644 --- a/app/services/grok/processor.py +++ b/app/services/grok/processor.py @@ -117,6 +117,8 @@ def __init__(self, model: str, token: str = "", think: bool = None): self.fingerprint: str = "" self.think_opened: bool = False self.role_sent: bool = False + self._output_text: str = "" + self._reasoning_text: str = "" self.filter_tags = get_config("grok.filter_tags", []) self.image_format = get_config("app.image_format", "url") @@ -158,6 +160,7 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N idx = img.get('imageIndex', 0) + 1 progress = img.get('progress', 0) yield self._sse(f"正在生成第{idx}张图片中,当前进度{progress}%\n") + self._reasoning_text += f"正在生成第{idx}张图片中,当前进度{progress}%\n" continue # modelResponse @@ -165,6 +168,7 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N if self.think_opened and self.show_think: if msg := mr.get("message"): yield self._sse(msg + "\n") + self._reasoning_text += msg + "\n" yield self._sse("\n") self.think_opened = False @@ -172,18 +176,21 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N for url in mr.get("generatedImageUrls", []): parts = url.split("/") img_id = parts[-2] if len(parts) >= 2 else "image" - + if self.image_format == "base64": dl_service = self._get_dl() base64_data = await dl_service.to_base64(url, self.token, "image") if base64_data: yield self._sse(f"![{img_id}]({base64_data})\n") + self._output_text += f"![{img_id}]({base64_data})\n" else: final_url = await self.process_url(url, "image") yield self._sse(f"![{img_id}]({final_url})\n") + self._output_text += f"![{img_id}]({final_url})\n" else: final_url = await self.process_url(url, "image") yield self._sse(f"![{img_id}]({final_url})\n") + self._output_text += f"![{img_id}]({final_url})\n" if (meta := mr.get("metadata", {})).get("llm_info", {}).get("modelHash"): self.fingerprint = meta["llm_info"]["modelHash"] @@ -193,9 +200,14 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N if (token := resp.get("token")) is not None: if token and not (self.filter_tags and any(t in token for t in self.filter_tags)): yield self._sse(token) + if self.think_opened and self.show_think: + self._reasoning_text += token + else: + self._output_text += token if self.think_opened: yield self._sse("\n") + self.think_opened = False yield self._sse(finish="stop") yield "data: [DONE]\n\n" except Exception as e: @@ -204,6 +216,10 @@ async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, N finally: await self.close() + def build_usage(self, prompt_messages: Optional[list[dict]] = None) -> dict[str, Any]: + usage = build_chat_usage(prompt_messages or [], (self._output_text + self._reasoning_text)) + return usage + class CollectProcessor(BaseProcessor): """非流式响应处理器""" diff --git a/src/grok/processor.ts b/src/grok/processor.ts index c6de5d2..9200010 100644 --- a/src/grok/processor.ts +++ b/src/grok/processor.ts @@ -123,7 +123,11 @@ export function createOpenAiStreamFromGrokNdjson( global: GlobalSettings; origin: string; promptMessages?: Array<{ content?: unknown }>; - onFinish?: (result: { status: number; duration: number }) => Promise | void; + onFinish?: (result: { + status: number; + duration: number; + usage?: ReturnType; + }) => Promise | void; }, ): ReadableStream { const { settings, global, origin } = opts; @@ -419,7 +423,13 @@ export function createOpenAiStreamFromGrokNdjson( ); controller.enqueue(encoder.encode(makeChunk(id, created, currentModel, "", "stop"))); controller.enqueue(encoder.encode(makeDone())); - if (opts.onFinish) await opts.onFinish({ status: finalStatus, duration: (Date.now() - startTime) / 1000 }); + if (opts.onFinish) { + await opts.onFinish({ + status: finalStatus, + duration: (Date.now() - startTime) / 1000, + usage, + }); + } controller.close(); } catch (e) { finalStatus = 500; @@ -429,7 +439,9 @@ export function createOpenAiStreamFromGrokNdjson( ), ); controller.enqueue(encoder.encode(makeDone())); - if (opts.onFinish) await opts.onFinish({ status: finalStatus, duration: (Date.now() - startTime) / 1000 }); + if (opts.onFinish) { + await opts.onFinish({ status: finalStatus, duration: (Date.now() - startTime) / 1000 }); + } controller.close(); } finally { try { diff --git a/src/routes/openai.ts b/src/routes/openai.ts index df6f89f..e3fe307 100644 --- a/src/routes/openai.ts +++ b/src/routes/openai.ts @@ -1304,8 +1304,13 @@ openAiRoutes.post("/chat/completions", async (c) => { global: settingsBundle.global, origin, promptMessages, - onFinish: async ({ status, duration }) => { + onFinish: async ({ status, duration, usage }) => { const promptEst = estimateInputTokensFromMessages(promptMessages); + const resolved = usage ?? buildChatUsageFromTexts({ + promptTextTokens: promptEst.textTokens, + promptImageTokens: promptEst.imageTokens, + completionText: "", + }); await addRequestLog(c.env.DB, { ip, model: requestedModel, @@ -1313,11 +1318,11 @@ openAiRoutes.post("/chat/completions", async (c) => { status, key_name: keyName, token_suffix: jwt.slice(-6), - total_tokens: promptEst.promptTokens, - input_tokens: promptEst.promptTokens, - output_tokens: 0, - reasoning_tokens: 0, - cached_tokens: 0, + total_tokens: resolved.total_tokens, + input_tokens: resolved.input_tokens, + output_tokens: resolved.output_tokens, + reasoning_tokens: resolved.reasoning_tokens, + cached_tokens: resolved.cached_tokens, error: status === 200 ? "" : "stream_error", }); }, From 4a8593803b28eba1dc9dab66480bd0864969f980 Mon Sep 17 00:00:00 2001 From: ZIC143 <2022220052@email.szu.edu.cn> Date: Sat, 7 Feb 2026 21:18:06 +0800 Subject: [PATCH 4/5] =?UTF-8?q?feat(logs):=20update=20request=20log=20stru?= =?UTF-8?q?cture=20to=20include=20token=20metrics=20feat(logs):=20?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=AF=B7=E6=B1=82=E6=97=A5=E5=BF=97=E7=BB=93?= =?UTF-8?q?=E6=9E=84=E4=BB=A5=E5=8C=85=E5=90=AB=20token=20=E6=8C=87?= =?UTF-8?q?=E6=A0=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/repo/logs.ts | 9 ++++-- src/routes/admin.ts | 5 +++ src/routes/openai.ts | 75 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/repo/logs.ts b/src/repo/logs.ts index 3e7d9a8..bf3832f 100644 --- a/src/repo/logs.ts +++ b/src/repo/logs.ts @@ -1,5 +1,5 @@ import type { Env } from "../env"; -import { dbAll, dbRun } from "../db"; +import { dbAll, dbFirst, dbRun } from "../db"; import { nowMs, formatUtcMs } from "../utils/time"; export interface RequestLogRow { @@ -20,9 +20,14 @@ export interface RequestLogRow { error: string; } +type RequestLogInsert = Omit & + Partial> & { + id?: string; + }; + export async function addRequestLog( db: Env["DB"], - entry: Omit & { id?: string }, + entry: RequestLogInsert, ): Promise { const ts = nowMs(); const id = entry.id ?? String(ts); diff --git a/src/routes/admin.ts b/src/routes/admin.ts index db194bd..f323e35 100644 --- a/src/routes/admin.ts +++ b/src/routes/admin.ts @@ -1260,6 +1260,11 @@ adminRoutes.post("/api/logs/add", requireAdminAuth, async (c) => { status: Number(body.status ?? 200), key_name: "admin", token_suffix: "", + total_tokens: 0, + input_tokens: 0, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, error: String(body.error ?? ""), }); return c.json({ success: true }); diff --git a/src/routes/openai.ts b/src/routes/openai.ts index e3fe307..62a5136 100644 --- a/src/routes/openai.ts +++ b/src/routes/openai.ts @@ -27,6 +27,9 @@ import { nowMs } from "../utils/time"; import { arrayBufferToBase64 } from "../utils/base64"; import { upsertCacheRow } from "../repo/cache"; +const IMAGE_GENERATION_MODEL_ID = "grok-imagine-1.0"; +const IMAGE_EDIT_MODEL_ID = "grok-imagine-1.0-edit"; + function openAiError(message: string, code: string): Record { return { error: { message, type: "invalid_request_error", code } }; } @@ -502,6 +505,43 @@ function parseImageModel(input: unknown, fallback: string): string { return String(input ?? fallback).trim() || fallback; } +function parseImagePrompt(input: unknown): string { + if (input === undefined || input === null) return ""; + if (typeof input === "string") return input.trim(); + if (Array.isArray(input)) return input.map((v) => String(v ?? "")).join(" ").trim(); + return String(input).trim(); +} + +function parseImageCount(input: unknown): number { + const raw = Number(input); + if (!Number.isFinite(raw)) return 1; + const value = Math.floor(raw); + return Math.max(1, Math.min(10, value)); +} + +function parseImageSize(input: unknown): string { + const value = String(input ?? "").trim().toLowerCase(); + if (!value) return "1024x1024"; + const allowed = new Set([ + "256x256", + "512x512", + "1024x1024", + "1024x576", + "1280x720", + "1536x864", + "576x1024", + "720x1280", + "864x1536", + "1024x1536", + "512x768", + "768x1024", + "1536x1024", + "768x512", + "1024x768", + ]); + return allowed.has(value) ? value : "1024x1024"; +} + function parseImageStream(input: unknown): boolean { return toBool(input); } @@ -1420,6 +1460,7 @@ openAiRoutes.post("/images/generations", async (c) => { const keyName = c.get("apiAuth").name ?? "Unknown"; const origin = new URL(c.req.url).origin; + let prompt = ""; let requestedModel = IMAGE_GENERATION_MODEL_ID; try { const body = (await c.req.json()) as { @@ -1431,7 +1472,7 @@ openAiRoutes.post("/images/generations", async (c) => { stream?: unknown; response_format?: unknown; }; - const prompt = parseImagePrompt(body.prompt); + prompt = parseImagePrompt(body.prompt); const promptErr = nonEmptyPromptOrError(prompt); if (promptErr) return c.json(openAiError(promptErr.message, promptErr.code), 400); @@ -1505,6 +1546,11 @@ openAiRoutes.post("/images/generations", async (c) => { status, key_name: keyName, token_suffix: getTokenSuffix(experimentalToken.token), + total_tokens: 0, + input_tokens: 0, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, error: status === 200 ? "" : "stream_error", }); }, @@ -1586,6 +1632,11 @@ openAiRoutes.post("/images/generations", async (c) => { status, key_name: keyName, token_suffix: getTokenSuffix(chosen.token), + total_tokens: 0, + input_tokens: 0, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, error: status === 200 ? "" : "stream_error", }); }, @@ -1682,7 +1733,7 @@ openAiRoutes.post("/images/generations", async (c) => { keyName, status: 400, error: message, - prompt: imageCallPrompt("generation", prompt), + prompt: imageCallPrompt("generation", prompt || ""), }); return c.json(openAiError(message, "content_policy_violation"), 400); } @@ -1694,7 +1745,7 @@ openAiRoutes.post("/images/generations", async (c) => { keyName, status: 500, error: message, - prompt: imageCallPrompt("generation", prompt), + prompt: imageCallPrompt("generation", prompt || ""), }); return c.json(openAiError(message || "Internal error", "internal_error"), 500); } @@ -1707,10 +1758,11 @@ openAiRoutes.post("/images/edits", async (c) => { const origin = new URL(c.req.url).origin; const maxImageBytes = 50 * 1024 * 1024; + let prompt = ""; let requestedModel = IMAGE_EDIT_MODEL_ID; try { const form = await c.req.formData(); - const prompt = parseImagePrompt(form.get("prompt")); + prompt = parseImagePrompt(form.get("prompt")); const promptErr = nonEmptyPromptOrError(prompt); if (promptErr) return c.json(openAiError(promptErr.message, promptErr.code), 400); @@ -1825,6 +1877,7 @@ openAiRoutes.post("/images/edits", async (c) => { cookie, settings: settingsBundle.grok, n, + prompt: imageCallPrompt("edit", prompt), onFinish: async ({ status, duration }) => { await addRequestLog(c.env.DB, { ip, @@ -1833,6 +1886,11 @@ openAiRoutes.post("/images/edits", async (c) => { status, key_name: keyName, token_suffix: getTokenSuffix(chosen.token), + total_tokens: 0, + input_tokens: 0, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, error: status === 200 ? "" : "stream_error", }); }, @@ -1896,6 +1954,11 @@ openAiRoutes.post("/images/edits", async (c) => { status, key_name: keyName, token_suffix: getTokenSuffix(chosen.token), + total_tokens: 0, + input_tokens: 0, + output_tokens: 0, + reasoning_tokens: 0, + cached_tokens: 0, error: status === 200 ? "" : "stream_error", }); }, @@ -1979,7 +2042,7 @@ openAiRoutes.post("/images/edits", async (c) => { keyName, status: 400, error: message, - prompt: imageCallPrompt("edit", prompt), + prompt: imageCallPrompt("edit", prompt || ""), }); return c.json(openAiError(message, "content_policy_violation"), 400); } @@ -1991,7 +2054,7 @@ openAiRoutes.post("/images/edits", async (c) => { keyName, status: 500, error: message, - prompt: imageCallPrompt("edit", prompt), + prompt: imageCallPrompt("edit", prompt || ""), }); return c.json(openAiError(message || "Internal error", "internal_error"), 500); } From 0f4e63d5fc6646ed439729cdd1812524c363cd05 Mon Sep 17 00:00:00 2001 From: ZIC143 <2022220052@email.szu.edu.cn> Date: Sat, 7 Feb 2026 22:04:04 +0800 Subject: [PATCH 5/5] =?UTF-8?q?fix(logs):=20correct=20SQL=20insert=20state?= =?UTF-8?q?ment=20parameter=20count=20in=20addRequestLog=20function=20fix(?= =?UTF-8?q?logs):=20=E4=BF=AE=E6=AD=A3=20addRequestLog=20=E5=87=BD?= =?UTF-8?q?=E6=95=B0=E4=B8=AD=20SQL=20=E6=8F=92=E5=85=A5=E8=AF=AD=E5=8F=A5?= =?UTF-8?q?=E7=9A=84=E5=8F=82=E6=95=B0=E6=95=B0=E9=87=8F=E5=A4=84=E7=90=86?= =?UTF-8?q?=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/repo/logs.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repo/logs.ts b/src/repo/logs.ts index bf3832f..1db5b64 100644 --- a/src/repo/logs.ts +++ b/src/repo/logs.ts @@ -34,7 +34,7 @@ export async function addRequestLog( const time = formatUtcMs(ts); await dbRun( db, - "INSERT INTO request_logs(id,time,timestamp,ip,model,duration,status,key_name,token_suffix,total_tokens,input_tokens,output_tokens,reasoning_tokens,cached_tokens,error) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)", + "INSERT INTO request_logs(id,time,timestamp,ip,model,duration,status,key_name,token_suffix,total_tokens,input_tokens,output_tokens,reasoning_tokens,cached_tokens,error) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", [ id, time,