From a9fb610260cbc753b47e82b7d833cf5a29891209 Mon Sep 17 00:00:00 2001
From: Tethys Plex <180962954+TethysPlex@users.noreply.github.com>
Date: Thu, 22 Jan 2026 02:22:12 +1100
Subject: [PATCH 1/2] wip
---
middleware/llm_endpoint_guard.go | 74 +++++
middleware/llm_endpoint_guard_test.go | 51 ++++
router/relay-router.go | 5 +
.../operation_setting/llm_endpoint_setting.go | 45 +++
.../components/settings/OperationSetting.jsx | 18 ++
.../Operation/SettingsLLMEndpoints.jsx | 265 ++++++++++++++++++
6 files changed, 458 insertions(+)
create mode 100644 middleware/llm_endpoint_guard.go
create mode 100644 middleware/llm_endpoint_guard_test.go
create mode 100644 setting/operation_setting/llm_endpoint_setting.go
create mode 100644 web/src/pages/Setting/Operation/SettingsLLMEndpoints.jsx
diff --git a/middleware/llm_endpoint_guard.go b/middleware/llm_endpoint_guard.go
new file mode 100644
index 000000000..f1907da6c
--- /dev/null
+++ b/middleware/llm_endpoint_guard.go
@@ -0,0 +1,74 @@
+package middleware
+
+import (
+ "fmt"
+ "net/http"
+
+ "github.com/QuantumNous/new-api/setting/operation_setting"
+ "github.com/QuantumNous/new-api/types"
+ "github.com/gin-gonic/gin"
+)
+
+// LLMEndpointGuard blocks globally disabled LLM-related endpoints.
+//
+// It returns 404 to make disabled endpoints behave like they do not exist.
+func LLMEndpointGuard() gin.HandlerFunc {
+ return func(c *gin.Context) {
+ enabled, ok := isCurrentLLMEndpointEnabled(c)
+ if ok && !enabled {
+ abortAsRelayNotFound(c)
+ return
+ }
+ c.Next()
+ }
+}
+
+func isCurrentLLMEndpointEnabled(c *gin.Context) (enabled bool, ok bool) {
+ fullPath := c.FullPath()
+ if fullPath == "" {
+ return true, false
+ }
+
+ settings := operation_setting.GetLLMEndpointSetting()
+
+ switch fullPath {
+ case "/v1/completions":
+ return settings.EnableCompletions, true
+ case "/v1/chat/completions", "/pg/chat/completions":
+ return settings.EnableChatCompletions, true
+ case "/v1/responses":
+ return settings.EnableResponses, true
+ case "/v1/messages":
+ return settings.EnableClaudeMessages, true
+ case "/v1/embeddings":
+ return settings.EnableEmbeddings, true
+ case "/v1/edits", "/v1/images/generations", "/v1/images/edits", "/v1/images/variations":
+ return settings.EnableImages, true
+ case "/v1/audio/transcriptions", "/v1/audio/translations", "/v1/audio/speech":
+ return settings.EnableAudio, true
+ case "/v1/moderations":
+ return settings.EnableModerations, true
+ case "/v1/rerank":
+ return settings.EnableRerank, true
+ case "/v1/realtime":
+ return settings.EnableRealtime, true
+ case "/v1/engines/:model/embeddings", "/v1/models/*path", "/v1beta/models", "/v1beta/openai/models", "/v1beta/models/*path":
+ return settings.EnableGemini, true
+ default:
+ return true, false
+ }
+}
+
+func abortAsRelayNotFound(c *gin.Context) {
+ err := types.OpenAIError{
+ Message: fmt.Sprintf("Invalid URL (%s %s)", c.Request.Method, c.Request.URL.Path),
+ Type: "invalid_request_error",
+ Param: "",
+ Code: "",
+ }
+ c.JSON(http.StatusNotFound, gin.H{
+ "error": err,
+ })
+ c.Abort()
+}
+
diff --git a/middleware/llm_endpoint_guard_test.go b/middleware/llm_endpoint_guard_test.go
new file mode 100644
index 000000000..87eda6dcf
--- /dev/null
+++ b/middleware/llm_endpoint_guard_test.go
@@ -0,0 +1,51 @@
+package middleware
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "testing"
+
+ "github.com/gin-gonic/gin"
+)
+
+func TestLLMEndpointGuard_Defaults(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+
+ router := gin.New()
+ authMiddleware := func(c *gin.Context) {
+ c.Status(http.StatusUnauthorized)
+ c.Abort()
+ }
+
+ v1 := router.Group("/v1")
+ v1.Use(LLMEndpointGuard())
+ v1.Use(authMiddleware)
+ v1.POST("/completions", func(c *gin.Context) { c.Status(http.StatusOK) })
+ v1.POST("/chat/completions", func(c *gin.Context) { c.Status(http.StatusOK) })
+ v1.POST("/embeddings", func(c *gin.Context) { c.Status(http.StatusOK) })
+ v1.GET("/realtime", func(c *gin.Context) { c.Status(http.StatusOK) })
+
+ v1betaModels := router.Group("/v1beta/models")
+ v1betaModels.Use(LLMEndpointGuard())
+ v1betaModels.Use(authMiddleware)
+ v1betaModels.GET("", func(c *gin.Context) { c.Status(http.StatusOK) })
+
+ assertStatus := func(method, path string, want int) {
+ request := httptest.NewRequest(method, path, nil)
+ response := httptest.NewRecorder()
+ router.ServeHTTP(response, request)
+ if response.Code != want {
+ t.Fatalf("%s %s: expected %d, got %d", method, path, want, response.Code)
+ }
+ }
+
+ // Defaults: allow completions and chat completions.
+ assertStatus(http.MethodPost, "/v1/completions", http.StatusUnauthorized)
+ assertStatus(http.MethodPost, "/v1/chat/completions", http.StatusUnauthorized)
+
+ // Defaults: disable everything else.
+ assertStatus(http.MethodPost, "/v1/embeddings", http.StatusNotFound)
+ assertStatus(http.MethodGet, "/v1/realtime", http.StatusNotFound)
+ assertStatus(http.MethodGet, "/v1beta/models", http.StatusNotFound)
+}
+
diff --git a/router/relay-router.go b/router/relay-router.go
index b5ac16c64..23c47d583 100644
--- a/router/relay-router.go
+++ b/router/relay-router.go
@@ -43,6 +43,7 @@ func SetRelayRouter(router *gin.Engine) {
}
geminiRouter := router.Group("/v1beta/models")
+ geminiRouter.Use(middleware.LLMEndpointGuard())
geminiRouter.Use(middleware.TokenAuth())
{
geminiRouter.GET("", func(c *gin.Context) {
@@ -51,6 +52,7 @@ func SetRelayRouter(router *gin.Engine) {
}
geminiCompatibleRouter := router.Group("/v1beta/openai/models")
+ geminiCompatibleRouter.Use(middleware.LLMEndpointGuard())
geminiCompatibleRouter.Use(middleware.TokenAuth())
{
geminiCompatibleRouter.GET("", func(c *gin.Context) {
@@ -59,11 +61,13 @@ func SetRelayRouter(router *gin.Engine) {
}
playgroundRouter := router.Group("/pg")
+ playgroundRouter.Use(middleware.LLMEndpointGuard())
playgroundRouter.Use(middleware.UserAuth(), middleware.Distribute())
{
playgroundRouter.POST("/chat/completions", controller.Playground)
}
relayV1Router := router.Group("/v1")
+ relayV1Router.Use(middleware.LLMEndpointGuard())
relayV1Router.Use(middleware.TokenAuth())
relayV1Router.Use(middleware.ModelRequestRateLimit())
{
@@ -173,6 +177,7 @@ func SetRelayRouter(router *gin.Engine) {
}
relayGeminiRouter := router.Group("/v1beta")
+ relayGeminiRouter.Use(middleware.LLMEndpointGuard())
relayGeminiRouter.Use(middleware.TokenAuth())
relayGeminiRouter.Use(middleware.ModelRequestRateLimit())
relayGeminiRouter.Use(middleware.Distribute())
diff --git a/setting/operation_setting/llm_endpoint_setting.go b/setting/operation_setting/llm_endpoint_setting.go
new file mode 100644
index 000000000..8152d39fa
--- /dev/null
+++ b/setting/operation_setting/llm_endpoint_setting.go
@@ -0,0 +1,45 @@
+package operation_setting
+
+import "github.com/QuantumNous/new-api/setting/config"
+
+// LLMEndpointSetting controls which LLM-related API endpoints are enabled globally.
+//
+// Disabled endpoints should behave like the endpoint does not exist (HTTP 404).
+//
+// Default: enable completions and chat completions, disable everything else.
+type LLMEndpointSetting struct {
+ EnableCompletions bool `json:"enable_completions"`
+ EnableChatCompletions bool `json:"enable_chat_completions"`
+ EnableResponses bool `json:"enable_responses"`
+ EnableClaudeMessages bool `json:"enable_claude_messages"`
+ EnableEmbeddings bool `json:"enable_embeddings"`
+ EnableImages bool `json:"enable_images"`
+ EnableAudio bool `json:"enable_audio"`
+ EnableModerations bool `json:"enable_moderations"`
+ EnableRerank bool `json:"enable_rerank"`
+ EnableRealtime bool `json:"enable_realtime"`
+ EnableGemini bool `json:"enable_gemini"`
+}
+
+var llmEndpointSetting = LLMEndpointSetting{
+ EnableCompletions: true,
+ EnableChatCompletions: true,
+ EnableResponses: false,
+ EnableClaudeMessages: false,
+ EnableEmbeddings: false,
+ EnableImages: false,
+ EnableAudio: false,
+ EnableModerations: false,
+ EnableRerank: false,
+ EnableRealtime: false,
+ EnableGemini: false,
+}
+
+func init() {
+ config.GlobalConfig.Register("llm_endpoint_setting", &llmEndpointSetting)
+}
+
+func GetLLMEndpointSetting() *LLMEndpointSetting {
+ return &llmEndpointSetting
+}
+
diff --git a/web/src/components/settings/OperationSetting.jsx b/web/src/components/settings/OperationSetting.jsx
index 4a77bcf10..16d4ffb17 100644
--- a/web/src/components/settings/OperationSetting.jsx
+++ b/web/src/components/settings/OperationSetting.jsx
@@ -27,6 +27,7 @@ import SettingsLog from '../../pages/Setting/Operation/SettingsLog';
import SettingsMonitoring from '../../pages/Setting/Operation/SettingsMonitoring';
import SettingsCreditLimit from '../../pages/Setting/Operation/SettingsCreditLimit';
import SettingsCheckin from '../../pages/Setting/Operation/SettingsCheckin';
+import SettingsLLMEndpoints from '../../pages/Setting/Operation/SettingsLLMEndpoints';
import { API, showError, toBoolean } from '../../helpers';
const OperationSetting = () => {
@@ -76,6 +77,19 @@ const OperationSetting = () => {
'checkin_setting.enabled': false,
'checkin_setting.min_quota': 1000,
'checkin_setting.max_quota': 10000,
+
+ /* LLM 端点开关 */
+ 'llm_endpoint_setting.enable_completions': true,
+ 'llm_endpoint_setting.enable_chat_completions': true,
+ 'llm_endpoint_setting.enable_responses': false,
+ 'llm_endpoint_setting.enable_claude_messages': false,
+ 'llm_endpoint_setting.enable_embeddings': false,
+ 'llm_endpoint_setting.enable_images': false,
+ 'llm_endpoint_setting.enable_audio': false,
+ 'llm_endpoint_setting.enable_moderations': false,
+ 'llm_endpoint_setting.enable_rerank': false,
+ 'llm_endpoint_setting.enable_realtime': false,
+ 'llm_endpoint_setting.enable_gemini': false,
});
let [loading, setLoading] = useState(false);
@@ -121,6 +135,10 @@ const OperationSetting = () => {
+ {/* LLM 端点开关 */}
+
+
+
{/* 顶栏模块管理 */}
diff --git a/web/src/pages/Setting/Operation/SettingsLLMEndpoints.jsx b/web/src/pages/Setting/Operation/SettingsLLMEndpoints.jsx
new file mode 100644
index 000000000..c112402a2
--- /dev/null
+++ b/web/src/pages/Setting/Operation/SettingsLLMEndpoints.jsx
@@ -0,0 +1,265 @@
+/*
+Copyright (C) 2025 QuantumNous
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+
+For commercial licensing, please contact support@quantumnous.com
+*/
+
+import React, { useEffect, useRef, useState } from 'react';
+import { Banner, Button, Col, Form, Row, Spin } from '@douyinfe/semi-ui';
+import { useTranslation } from 'react-i18next';
+import {
+ API,
+ compareObjects,
+ showError,
+ showSuccess,
+ showWarning,
+} from '../../../helpers';
+
+const defaultLLMEndpointInputs = {
+ 'llm_endpoint_setting.enable_completions': true,
+ 'llm_endpoint_setting.enable_chat_completions': true,
+ 'llm_endpoint_setting.enable_responses': false,
+ 'llm_endpoint_setting.enable_claude_messages': false,
+ 'llm_endpoint_setting.enable_embeddings': false,
+ 'llm_endpoint_setting.enable_images': false,
+ 'llm_endpoint_setting.enable_audio': false,
+ 'llm_endpoint_setting.enable_moderations': false,
+ 'llm_endpoint_setting.enable_rerank': false,
+ 'llm_endpoint_setting.enable_realtime': false,
+ 'llm_endpoint_setting.enable_gemini': false,
+};
+
+export default function SettingsLLMEndpoints(props) {
+ const { t } = useTranslation();
+ const [loading, setLoading] = useState(false);
+ const [inputs, setInputs] = useState(defaultLLMEndpointInputs);
+ const [inputsRow, setInputsRow] = useState(inputs);
+ const refForm = useRef();
+
+ function handleFieldChange(fieldName) {
+ return (value) => {
+ setInputs((prev) => ({ ...prev, [fieldName]: value }));
+ };
+ }
+
+ function onSubmit() {
+ const updateArray = compareObjects(inputs, inputsRow);
+ if (!updateArray.length) return showWarning(t('你似乎并没有修改什么'));
+
+ const requestQueue = updateArray.map((item) =>
+ API.put('/api/option/', {
+ key: item.key,
+ value: String(inputs[item.key]),
+ }),
+ );
+
+ setLoading(true);
+ Promise.all(requestQueue)
+ .then((res) => {
+ if (requestQueue.length === 1) {
+ if (res.includes(undefined)) return;
+ } else if (requestQueue.length > 1) {
+ if (res.includes(undefined))
+ return showError(t('部分保存失败,请重试'));
+ }
+ showSuccess(t('保存成功'));
+ props.refresh();
+ })
+ .catch(() => {
+ showError(t('保存失败,请重试'));
+ })
+ .finally(() => {
+ setLoading(false);
+ });
+ }
+
+ useEffect(() => {
+ const currentInputs = {};
+ for (const key of Object.keys(defaultLLMEndpointInputs)) {
+ if (props.options[key] !== undefined) {
+ currentInputs[key] = props.options[key];
+ }
+ }
+
+ const mergedInputs = { ...defaultLLMEndpointInputs, ...currentInputs };
+ setInputs(mergedInputs);
+ setInputsRow(structuredClone(mergedInputs));
+ refForm.current?.setValues(mergedInputs);
+ }, [props.options]);
+
+ return (
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ );
+}
From 176aceadeeeb3508dadf998f70c6a06c782d2ef4 Mon Sep 17 00:00:00 2001
From: Tethys Plex <180962954+TethysPlex@users.noreply.github.com>
Date: Thu, 22 Jan 2026 04:52:39 +1100
Subject: [PATCH 2/2] wip
---
controller/relay.go | 17 +
controller/upstream_error_mask.go | 45 +++
dto/channel_settings.go | 15 +-
relay/channel/ali/image.go | 2 +-
relay/channel/ali/rerank.go | 2 +-
relay/channel/claude/relay-claude.go | 4 +-
relay/channel/jimeng/image.go | 2 +-
relay/channel/openai/adaptor.go | 36 +-
relay/channel/openai/chat_via_responses.go | 4 +-
relay/channel/openai/completions_via_chat.go | 68 ++++
.../openai/completions_via_chat_response.go | 351 ++++++++++++++++++
relay/channel/openai/relay-openai.go | 2 +-
relay/channel/openai/relay_responses.go | 2 +-
relay/channel/palm/relay-palm.go | 2 +-
relay/channel/tencent/relay-tencent.go | 2 +-
relay/channel/zhipu/relay-zhipu.go | 2 +-
relay/channel/zhipu_4v/image.go | 2 +-
service/error.go | 6 +-
types/error.go | 28 ++
web-v2/src/pages/console/TokenListPage.tsx | 156 +-------
web-v2/src/pages/console/UsageLogsPage.tsx | 97 +----
.../channels/modals/EditChannelModal.jsx | 29 ++
22 files changed, 619 insertions(+), 255 deletions(-)
create mode 100644 controller/upstream_error_mask.go
create mode 100644 relay/channel/openai/completions_via_chat.go
create mode 100644 relay/channel/openai/completions_via_chat_response.go
diff --git a/controller/relay.go b/controller/relay.go
index e033847df..6ead639b1 100644
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -86,6 +86,23 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
defer func() {
if newAPIError != nil {
logger.LogError(c, fmt.Sprintf("relay error: %s", newAPIError.Error()))
+ if shouldReturnOverloadedForUpstreamError(newAPIError) {
+ switch relayFormat {
+ case types.RelayFormatOpenAIRealtime:
+ helper.WssError(c, ws, overloadedOpenAIError())
+ case types.RelayFormatClaude:
+ c.JSON(http.StatusServiceUnavailable, gin.H{
+ "type": "error",
+ "error": overloadedClaudeError(),
+ })
+ default:
+ c.JSON(http.StatusServiceUnavailable, gin.H{
+ "error": overloadedOpenAIError(),
+ })
+ }
+ return
+ }
+
newAPIError.SetMessage(common.MessageWithRequestId(newAPIError.Error(), requestId))
switch relayFormat {
case types.RelayFormatOpenAIRealtime:
diff --git a/controller/upstream_error_mask.go b/controller/upstream_error_mask.go
new file mode 100644
index 000000000..d47019517
--- /dev/null
+++ b/controller/upstream_error_mask.go
@@ -0,0 +1,45 @@
+package controller
+
+import (
+ "net/http"
+
+ "github.com/QuantumNous/new-api/types"
+)
+
+const overloadedMessage = "Overloaded"
+
+func shouldMaskUpstreamStatusCode(statusCode int) bool {
+ if statusCode >= 500 && statusCode <= 599 {
+ return true
+ }
+ if statusCode >= 400 && statusCode <= 499 {
+ return statusCode != http.StatusBadRequest && statusCode != http.StatusRequestEntityTooLarge
+ }
+ return false
+}
+
+func shouldReturnOverloadedForUpstreamError(err *types.NewAPIError) bool {
+ if err == nil {
+ return false
+ }
+ if !err.IsUpstreamError() {
+ return false
+ }
+ return shouldMaskUpstreamStatusCode(err.UpstreamStatusCode())
+}
+
+func overloadedOpenAIError() types.OpenAIError {
+ return types.OpenAIError{
+ Message: overloadedMessage,
+ Type: "upstream_error",
+ Param: "",
+ Code: nil,
+ }
+}
+
+func overloadedClaudeError() types.ClaudeError {
+ return types.ClaudeError{
+ Type: "upstream_error",
+ Message: overloadedMessage,
+ }
+}
diff --git a/dto/channel_settings.go b/dto/channel_settings.go
index e88f2235e..d3e00bd40 100644
--- a/dto/channel_settings.go
+++ b/dto/channel_settings.go
@@ -1,12 +1,15 @@
package dto
type ChannelSettings struct {
- ForceFormat bool `json:"force_format,omitempty"`
- ThinkingToContent bool `json:"thinking_to_content,omitempty"`
- Proxy string `json:"proxy"`
- PassThroughBodyEnabled bool `json:"pass_through_body_enabled,omitempty"`
- SystemPrompt string `json:"system_prompt,omitempty"`
- SystemPromptOverride bool `json:"system_prompt_override,omitempty"`
+ ForceFormat bool `json:"force_format,omitempty"`
+ // If enabled, /v1/completions requests will be proxied via /v1/chat/completions
+ // and responses will be converted back to the legacy Completions-compatible format.
+ CompletionsViaChatCompletions bool `json:"completions_via_chat_completions,omitempty"`
+ ThinkingToContent bool `json:"thinking_to_content,omitempty"`
+ Proxy string `json:"proxy"`
+ PassThroughBodyEnabled bool `json:"pass_through_body_enabled,omitempty"`
+ SystemPrompt string `json:"system_prompt,omitempty"`
+ SystemPromptOverride bool `json:"system_prompt_override,omitempty"`
}
type VertexKeyType string
diff --git a/relay/channel/ali/image.go b/relay/channel/ali/image.go
index cfd9a0fdd..4a74192c9 100644
--- a/relay/channel/ali/image.go
+++ b/relay/channel/ali/image.go
@@ -315,7 +315,7 @@ func aliImageHandler(a *Adaptor, c *gin.Context, resp *http.Response, info *rela
Type: "ali_error",
Param: "",
Code: aliResponse.Output.Code,
- }, resp.StatusCode), nil
+ }, resp.StatusCode, types.ErrOptionWithUpstreamError()), nil
}
}
diff --git a/relay/channel/ali/rerank.go b/relay/channel/ali/rerank.go
index 1323fc830..2cd98211a 100644
--- a/relay/channel/ali/rerank.go
+++ b/relay/channel/ali/rerank.go
@@ -51,7 +51,7 @@ func RerankHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI
Type: aliResponse.Code,
Param: aliResponse.RequestId,
Code: aliResponse.Code,
- }, resp.StatusCode), nil
+ }, resp.StatusCode, types.ErrOptionWithUpstreamError()), nil
}
usage := dto.Usage{
diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go
index fe37a26e1..fa6dcd34c 100644
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -642,7 +642,7 @@ func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
return types.NewError(err, types.ErrorCodeBadResponseBody)
}
if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" {
- return types.WithClaudeError(*claudeError, http.StatusInternalServerError)
+ return types.WithClaudeError(*claudeError, http.StatusInternalServerError, types.ErrOptionWithUpstreamError())
}
if info.RelayFormat == types.RelayFormatClaude {
FormatClaudeResponseInfo(requestMode, &claudeResponse, nil, claudeInfo)
@@ -739,7 +739,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
return types.NewError(err, types.ErrorCodeBadResponseBody)
}
if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" {
- return types.WithClaudeError(*claudeError, http.StatusInternalServerError)
+ return types.WithClaudeError(*claudeError, http.StatusInternalServerError, types.ErrOptionWithUpstreamError())
}
if requestMode == RequestModeCompletion {
claudeInfo.Usage = service.ResponseText2Usage(c, claudeResponse.Completion, info.UpstreamModelName, info.GetEstimatePromptTokens())
diff --git a/relay/channel/jimeng/image.go b/relay/channel/jimeng/image.go
index e422e061d..0f16bde13 100644
--- a/relay/channel/jimeng/image.go
+++ b/relay/channel/jimeng/image.go
@@ -69,7 +69,7 @@ func jimengImageHandler(c *gin.Context, resp *http.Response, info *relaycommon.R
Type: "jimeng_error",
Param: "",
Code: fmt.Sprintf("%d", jimengResponse.Code),
- }, resp.StatusCode)
+ }, resp.StatusCode, types.ErrOptionWithUpstreamError())
}
// Convert Jimeng response to OpenAI format
diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go
index 52f5d731b..9e7bba42f 100644
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -171,6 +171,13 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
url = strings.Replace(url, "{model}", info.UpstreamModelName, -1)
return url, nil
default:
+ if info.ChannelType == constant.ChannelTypeOpenAI &&
+ info.RelayMode == relayconstant.RelayModeCompletions &&
+ info.ChannelSetting.CompletionsViaChatCompletions &&
+ !model_setting.GetGlobalSettings().PassThroughRequestEnabled &&
+ !info.ChannelSetting.PassThroughBodyEnabled {
+ return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, "/v1/chat/completions", info.ChannelType), nil
+ }
if info.RelayFormat == types.RelayFormatClaude || info.RelayFormat == types.RelayFormatGemini {
return fmt.Sprintf("%s/v1/chat/completions", info.ChannelBaseUrl), nil
}
@@ -217,6 +224,21 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
if request == nil {
return nil, errors.New("request is nil")
}
+ if info.ChannelType == constant.ChannelTypeOpenAI &&
+ info.RelayMode == relayconstant.RelayModeCompletions &&
+ info.ChannelSetting.CompletionsViaChatCompletions {
+ promptText := completionsPromptToString(request.Prompt)
+ request.Messages = []dto.Message{
+ {
+ Role: "assistant",
+ Content: promptText,
+ },
+ }
+ // Remove legacy fields to keep the upstream payload chat-completions compatible.
+ request.Prompt = nil
+ request.Prefix = nil
+ request.Suffix = nil
+ }
if info.ChannelType != constant.ChannelTypeOpenAI && info.ChannelType != constant.ChannelTypeAzure {
request.StreamOptions = nil
}
@@ -606,10 +628,18 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
usage, err = OaiResponsesHandler(c, info, resp)
}
default:
- if info.IsStream {
- usage, err = OaiStreamHandler(c, info, resp)
+ if shouldUseChatCompletionsForCompletions(info) {
+ if info.IsStream {
+ usage, err = OaiCompletionsViaChatStreamHandler(c, info, resp)
+ } else {
+ usage, err = OpenaiCompletionsViaChatHandler(c, info, resp)
+ }
} else {
- usage, err = OpenaiHandler(c, info, resp)
+ if info.IsStream {
+ usage, err = OaiStreamHandler(c, info, resp)
+ } else {
+ usage, err = OpenaiHandler(c, info, resp)
+ }
}
}
return
diff --git a/relay/channel/openai/chat_via_responses.go b/relay/channel/openai/chat_via_responses.go
index 1a2d1883e..9c916601b 100644
--- a/relay/channel/openai/chat_via_responses.go
+++ b/relay/channel/openai/chat_via_responses.go
@@ -36,7 +36,7 @@ func OaiResponsesToChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp
}
if oaiError := responsesResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
- return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
+ return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError())
}
responsesResp.Model = relaycommon.MaskMappedModelName(c, info, responsesResp.Model)
@@ -327,7 +327,7 @@ func OaiResponsesToChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo
case "response.error", "response.failed":
if streamResp.Response != nil {
if oaiErr := streamResp.Response.GetOpenAIError(); oaiErr != nil && oaiErr.Type != "" {
- streamErr = types.WithOpenAIError(*oaiErr, http.StatusInternalServerError)
+ streamErr = types.WithOpenAIError(*oaiErr, http.StatusInternalServerError, types.ErrOptionWithUpstreamError())
return false
}
}
diff --git a/relay/channel/openai/completions_via_chat.go b/relay/channel/openai/completions_via_chat.go
new file mode 100644
index 000000000..6b529c63f
--- /dev/null
+++ b/relay/channel/openai/completions_via_chat.go
@@ -0,0 +1,68 @@
+package openai
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/QuantumNous/new-api/common"
+ "github.com/QuantumNous/new-api/constant"
+ relaycommon "github.com/QuantumNous/new-api/relay/common"
+ relayconstant "github.com/QuantumNous/new-api/relay/constant"
+ "github.com/QuantumNous/new-api/setting/model_setting"
+)
+
+func shouldUseChatCompletionsForCompletions(info *relaycommon.RelayInfo) bool {
+ if info == nil {
+ return false
+ }
+ if info.ChannelType != constant.ChannelTypeOpenAI {
+ return false
+ }
+ if info.RelayMode != relayconstant.RelayModeCompletions {
+ return false
+ }
+ if !info.ChannelSetting.CompletionsViaChatCompletions {
+ return false
+ }
+ // This feature needs request-body conversion, so it cannot be used when passthrough is enabled.
+ if model_setting.GetGlobalSettings().PassThroughRequestEnabled || info.ChannelSetting.PassThroughBodyEnabled {
+ return false
+ }
+ return true
+}
+
+func completionsCompatibleID(upstreamID string) string {
+ if upstreamID == "" {
+ return ""
+ }
+ if strings.HasPrefix(upstreamID, "chatcmpl-") {
+ return "cmpl-" + strings.TrimPrefix(upstreamID, "chatcmpl-")
+ }
+ return upstreamID
+}
+
+func completionsPromptToString(prompt any) string {
+ if prompt == nil {
+ return ""
+ }
+
+ switch v := prompt.(type) {
+ case string:
+ return v
+ case []any:
+ parts := make([]string, 0, len(v))
+ for _, item := range v {
+ if item == nil {
+ continue
+ }
+ if s, ok := item.(string); ok {
+ parts = append(parts, s)
+ continue
+ }
+ parts = append(parts, common.Interface2String(item))
+ }
+ return strings.Join(parts, "\n")
+ default:
+ return fmt.Sprintf("%v", prompt)
+ }
+}
diff --git a/relay/channel/openai/completions_via_chat_response.go b/relay/channel/openai/completions_via_chat_response.go
new file mode 100644
index 000000000..0062ec3d6
--- /dev/null
+++ b/relay/channel/openai/completions_via_chat_response.go
@@ -0,0 +1,351 @@
+package openai
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "strconv"
+ "strings"
+
+ "github.com/QuantumNous/new-api/common"
+ "github.com/QuantumNous/new-api/dto"
+ "github.com/QuantumNous/new-api/logger"
+ relaycommon "github.com/QuantumNous/new-api/relay/common"
+ "github.com/QuantumNous/new-api/relay/helper"
+ "github.com/QuantumNous/new-api/service"
+ "github.com/QuantumNous/new-api/types"
+
+ "github.com/gin-gonic/gin"
+)
+
+type openAICompletionsResponse struct {
+ ID string `json:"id"`
+ Object string `json:"object"`
+ Created int64 `json:"created"`
+ Model string `json:"model"`
+ Choices []openAICompletionsChoice `json:"choices"`
+ Usage dto.Usage `json:"usage"`
+ Error any `json:"error,omitempty"`
+ Extra map[string]json.RawMessage `json:"-"`
+}
+
+type openAICompletionsChoice struct {
+ Text string `json:"text"`
+ Index int `json:"index"`
+ Logprobs any `json:"logprobs"`
+ FinishReason string `json:"finish_reason"`
+}
+
+type openAICompletionsStreamChunk struct {
+ ID string `json:"id"`
+ Object string `json:"object"`
+ Created int64 `json:"created"`
+ Model string `json:"model"`
+ Choices []openAICompletionsChunkItem `json:"choices"`
+}
+
+type openAICompletionsChunkItem struct {
+ Text string `json:"text"`
+ Index int `json:"index"`
+ Logprobs any `json:"logprobs"`
+ FinishReason *string `json:"finish_reason"`
+}
+
+func parseCreatedUnix(created any) int64 {
+ switch v := created.(type) {
+ case int64:
+ return v
+ case int:
+ return int64(v)
+ case float64:
+ return int64(v)
+ case json.Number:
+ if i, err := v.Int64(); err == nil {
+ return i
+ }
+ case string:
+ if i, err := strconv.ParseInt(v, 10, 64); err == nil {
+ return i
+ }
+ }
+ return 0
+}
+
+func messageToCompletionText(message dto.Message, thinkingToContent bool) string {
+ content := message.StringContent()
+ if !thinkingToContent {
+ return content
+ }
+
+ reasoning := message.ReasoningContent
+ if reasoning == "" {
+ reasoning = message.Reasoning
+ }
+ if reasoning == "" {
+ return content
+ }
+
+ if content == "" {
+ return "
\n" + reasoning + "\n\n"
+ }
+ return "
\n" + reasoning + "\n\n" + content
+}
+
+func OpenaiCompletionsViaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+ defer service.CloseResponseBodyGracefully(resp)
+
+ responseBody, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
+ }
+ if common.DebugEnabled {
+ println("upstream response body:", string(responseBody))
+ }
+
+ var chatResp dto.OpenAITextResponse
+ if err := common.Unmarshal(responseBody, &chatResp); err != nil {
+ return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
+ }
+
+ if oaiError := chatResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
+ return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError())
+ }
+
+ usage := chatResp.Usage
+ if usage.PromptTokens == 0 {
+ completionTokens := usage.CompletionTokens
+ if completionTokens == 0 {
+ for _, choice := range chatResp.Choices {
+ text := messageToCompletionText(choice.Message, info.ChannelSetting.ThinkingToContent)
+ completionTokens += service.CountTextToken(text, info.UpstreamModelName)
+ }
+ }
+ usage = dto.Usage{
+ PromptTokens: info.GetEstimatePromptTokens(),
+ CompletionTokens: completionTokens,
+ TotalTokens: info.GetEstimatePromptTokens() + completionTokens,
+ }
+ }
+
+ applyUsagePostProcessing(info, &usage, responseBody)
+
+ completionsResp := openAICompletionsResponse{
+ ID: completionsCompatibleID(chatResp.Id),
+ Object: "text_completion",
+ Created: parseCreatedUnix(chatResp.Created),
+ Model: relaycommon.MaskMappedModelName(c, info, chatResp.Model),
+ Choices: make([]openAICompletionsChoice, 0, len(chatResp.Choices)),
+ Usage: usage,
+ }
+
+ for _, choice := range chatResp.Choices {
+ completionsResp.Choices = append(completionsResp.Choices, openAICompletionsChoice{
+ Text: messageToCompletionText(choice.Message, info.ChannelSetting.ThinkingToContent),
+ Index: choice.Index,
+ Logprobs: nil,
+ FinishReason: choice.FinishReason,
+ })
+ }
+
+ responseBody, err = common.Marshal(completionsResp)
+ if err != nil {
+ return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
+ }
+
+ service.IOCopyBytesGracefully(c, resp, responseBody)
+ return &usage, nil
+}
+
+func OaiCompletionsViaChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+ if resp == nil || resp.Body == nil {
+ logger.LogError(c, "invalid response or response body")
+ return nil, types.NewOpenAIError(fmt.Errorf("invalid response"), types.ErrorCodeBadResponse, http.StatusInternalServerError)
+ }
+
+ defer service.CloseResponseBodyGracefully(resp)
+
+ var (
+ lastStreamData string
+ containStreamUsage bool
+ usage = &dto.Usage{}
+ responseTextBuilder strings.Builder
+ )
+
+ helper.StreamScannerHandler(c, resp, info, func(data string) bool {
+ if data == "" {
+ return true
+ }
+ lastStreamData = data
+
+ var streamResp dto.ChatCompletionsStreamResponse
+ if err := common.Unmarshal(common.StringToByteSlice(data), &streamResp); err != nil {
+ logger.LogError(c, "failed to unmarshal chat completion chunk: "+err.Error())
+ return false
+ }
+
+ if service.ValidUsage(streamResp.Usage) {
+ containStreamUsage = true
+ usage = streamResp.Usage
+ }
+
+ convertedChunks, chunkText := chatChunkToCompletionsChunks(c, info, streamResp)
+ if chunkText != "" {
+ responseTextBuilder.WriteString(chunkText)
+ }
+ for _, chunk := range convertedChunks {
+ info.SendResponseCount++
+ if err := helper.ObjectData(c, chunk); err != nil {
+ logger.LogError(c, "failed to write completion chunk: "+err.Error())
+ return false
+ }
+ }
+ return true
+ })
+
+ if !containStreamUsage {
+ usage = service.ResponseText2Usage(
+ c,
+ responseTextBuilder.String(),
+ info.UpstreamModelName,
+ info.GetEstimatePromptTokens(),
+ )
+ }
+
+ applyUsagePostProcessing(info, usage, common.StringToByteSlice(lastStreamData))
+ helper.Done(c)
+ return usage, nil
+}
+
+func chatChunkToCompletionsChunks(c *gin.Context, info *relaycommon.RelayInfo, chatChunk dto.ChatCompletionsStreamResponse) ([]openAICompletionsStreamChunk, string) {
+ if info == nil {
+ return nil, ""
+ }
+
+ created := chatChunk.Created
+ model := relaycommon.MaskMappedModelName(c, info, chatChunk.Model)
+ id := completionsCompatibleID(chatChunk.Id)
+
+ object := "text_completion"
+
+ // When thinking_to_content is enabled, emulate the existing tag-injection behavior
+ // from sendStreamData, but output as legacy completions chunks.
+ hasThinkingContent := false
+ hasContent := false
+ var thinkingContent strings.Builder
+ for _, choice := range chatChunk.Choices {
+ if rc := choice.Delta.GetReasoningContent(); len(rc) > 0 {
+ hasThinkingContent = true
+ thinkingContent.WriteString(rc)
+ }
+ if cc := choice.Delta.GetContentString(); len(cc) > 0 {
+ hasContent = true
+ }
+ }
+
+ chunks := make([]openAICompletionsStreamChunk, 0, 2)
+ var sentText strings.Builder
+
+ if info.ChannelSetting.ThinkingToContent && info.ThinkingContentInfo.IsFirstThinkingContent {
+ if hasThinkingContent {
+ text := "
\n" + thinkingContent.String()
+ chunk := openAICompletionsStreamChunk{
+ ID: id,
+ Object: object,
+ Created: created,
+ Model: model,
+ Choices: make([]openAICompletionsChunkItem, 0, len(chatChunk.Choices)),
+ }
+ for _, choice := range chatChunk.Choices {
+ chunk.Choices = append(chunk.Choices, openAICompletionsChunkItem{
+ Text: text,
+ Index: choice.Index,
+ Logprobs: nil,
+ FinishReason: nil,
+ })
+ }
+ chunks = append(chunks, chunk)
+ sentText.WriteString(text)
+ info.ThinkingContentInfo.IsFirstThinkingContent = false
+ info.ThinkingContentInfo.HasSentThinkingContent = true
+ return chunks, sentText.String()
+ }
+ }
+
+ // Insert closing tag before the first content token after thinking.
+ if info.ChannelSetting.ThinkingToContent &&
+ hasContent &&
+ !info.ThinkingContentInfo.SendLastThinkingContent &&
+ info.ThinkingContentInfo.HasSentThinkingContent {
+
+ text := "\n\n"
+ chunk := openAICompletionsStreamChunk{
+ ID: id,
+ Object: object,
+ Created: created,
+ Model: model,
+ Choices: make([]openAICompletionsChunkItem, 0, len(chatChunk.Choices)),
+ }
+ for _, choice := range chatChunk.Choices {
+ chunk.Choices = append(chunk.Choices, openAICompletionsChunkItem{
+ Text: text,
+ Index: choice.Index,
+ Logprobs: nil,
+ FinishReason: nil,
+ })
+ }
+ chunks = append(chunks, chunk)
+ sentText.WriteString(text)
+ info.ThinkingContentInfo.SendLastThinkingContent = true
+ }
+
+ // Convert the actual chunk.
+ chunk := openAICompletionsStreamChunk{
+ ID: id,
+ Object: object,
+ Created: created,
+ Model: model,
+ Choices: make([]openAICompletionsChunkItem, 0, len(chatChunk.Choices)),
+ }
+
+ for _, choice := range chatChunk.Choices {
+ var text string
+
+ if info.ChannelSetting.ThinkingToContent {
+ if rc := choice.Delta.GetReasoningContent(); rc != "" {
+ text = rc
+ } else {
+ text = choice.Delta.GetContentString()
+ }
+ } else {
+ text = choice.Delta.GetContentString()
+ }
+
+ chunk.Choices = append(chunk.Choices, openAICompletionsChunkItem{
+ Text: text,
+ Index: choice.Index,
+ Logprobs: nil,
+ FinishReason: choice.FinishReason,
+ })
+ sentText.WriteString(text)
+ }
+
+ // Skip the leading assistant-role-only chunk(s) to avoid leaking chat-specific patterns.
+ shouldSend := false
+ for _, choice := range chunk.Choices {
+ if choice.Text != "" || choice.FinishReason != nil {
+ shouldSend = true
+ break
+ }
+ }
+ if shouldSend {
+ chunks = append(chunks, chunk)
+ }
+
+ // Usage-only chunks are not part of legacy completions streaming; don't forward them.
+ // We keep them for billing via `containStreamUsage` in the caller.
+ if !shouldSend {
+ sentText.Reset()
+ }
+ return chunks, sentText.String()
+}
diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go
index 83125a5f8..7b0c2c169 100644
--- a/relay/channel/openai/relay-openai.go
+++ b/relay/channel/openai/relay-openai.go
@@ -232,7 +232,7 @@ func OpenaiHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respo
simpleResponse.Model = relaycommon.MaskMappedModelName(c, info, simpleResponse.Model)
if oaiError := simpleResponse.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
- return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
+ return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError())
}
forceFormat := false
diff --git a/relay/channel/openai/relay_responses.go b/relay/channel/openai/relay_responses.go
index 7b7e38b6b..dd3c186a8 100644
--- a/relay/channel/openai/relay_responses.go
+++ b/relay/channel/openai/relay_responses.go
@@ -31,7 +31,7 @@ func OaiResponsesHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}
if oaiError := responsesResponse.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
- return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
+ return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError())
}
if responsesResponse.HasImageGenerationCall() {
diff --git a/relay/channel/palm/relay-palm.go b/relay/channel/palm/relay-palm.go
index 786ea4cd2..782cc787b 100644
--- a/relay/channel/palm/relay-palm.go
+++ b/relay/channel/palm/relay-palm.go
@@ -118,7 +118,7 @@ func palmHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respons
Type: palmResponse.Error.Status,
Param: "",
Code: palmResponse.Error.Code,
- }, resp.StatusCode)
+ }, resp.StatusCode, types.ErrOptionWithUpstreamError())
}
fullTextResponse := responsePaLM2OpenAI(&palmResponse)
usage := service.ResponseText2Usage(c, palmResponse.Candidates[0].Content, info.UpstreamModelName, info.GetEstimatePromptTokens())
diff --git a/relay/channel/tencent/relay-tencent.go b/relay/channel/tencent/relay-tencent.go
index dbe7750e4..aef4f36b9 100644
--- a/relay/channel/tencent/relay-tencent.go
+++ b/relay/channel/tencent/relay-tencent.go
@@ -148,7 +148,7 @@ func tencentHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Resp
return nil, types.WithOpenAIError(types.OpenAIError{
Message: tencentSb.Response.Error.Message,
Code: tencentSb.Response.Error.Code,
- }, resp.StatusCode)
+ }, resp.StatusCode, types.ErrOptionWithUpstreamError())
}
fullTextResponse := responseTencent2OpenAI(&tencentSb.Response)
jsonResponse, err := common.Marshal(fullTextResponse)
diff --git a/relay/channel/zhipu/relay-zhipu.go b/relay/channel/zhipu/relay-zhipu.go
index 964dff082..26b502f8d 100644
--- a/relay/channel/zhipu/relay-zhipu.go
+++ b/relay/channel/zhipu/relay-zhipu.go
@@ -233,7 +233,7 @@ func zhipuHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respon
return nil, types.WithOpenAIError(types.OpenAIError{
Message: zhipuResponse.Msg,
Code: zhipuResponse.Code,
- }, resp.StatusCode)
+ }, resp.StatusCode, types.ErrOptionWithUpstreamError())
}
fullTextResponse := responseZhipu2OpenAI(&zhipuResponse)
jsonResponse, err := json.Marshal(fullTextResponse)
diff --git a/relay/channel/zhipu_4v/image.go b/relay/channel/zhipu_4v/image.go
index b1fd2c8e3..0f27eea73 100644
--- a/relay/channel/zhipu_4v/image.go
+++ b/relay/channel/zhipu_4v/image.go
@@ -71,7 +71,7 @@ func zhipu4vImageHandler(c *gin.Context, resp *http.Response, info *relaycommon.
Message: zhipuResp.Error.Message,
Type: "zhipu_image_error",
Code: zhipuResp.Error.Code,
- }, resp.StatusCode)
+ }, resp.StatusCode, types.ErrOptionWithUpstreamError())
}
payload := openAIImagePayload{}
diff --git a/service/error.go b/service/error.go
index 8191953cf..589eb1b77 100644
--- a/service/error.go
+++ b/service/error.go
@@ -82,7 +82,7 @@ func ClaudeErrorWrapperLocal(err error, code string, statusCode int) *dto.Claude
}
func RelayErrorHandler(ctx context.Context, resp *http.Response, showBodyWhenFail bool) (newApiErr *types.NewAPIError) {
- newApiErr = types.InitOpenAIError(types.ErrorCodeBadResponseStatusCode, resp.StatusCode)
+ newApiErr = types.InitOpenAIError(types.ErrorCodeBadResponseStatusCode, resp.StatusCode, types.ErrOptionWithUpstreamError())
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
@@ -112,14 +112,14 @@ func RelayErrorHandler(ctx context.Context, resp *http.Response, showBodyWhenFai
// General format error (OpenAI, Anthropic, Gemini, etc.)
oaiError := errResponse.TryToOpenAIError()
if oaiError != nil {
- newApiErr = types.WithOpenAIError(*oaiError, resp.StatusCode)
+ newApiErr = types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError())
if showBodyWhenFail {
newApiErr.Err = buildErrWithBody(newApiErr.Error())
}
return
}
}
- newApiErr = types.NewOpenAIError(errors.New(errResponse.ToMessage()), types.ErrorCodeBadResponseStatusCode, resp.StatusCode)
+ newApiErr = types.NewOpenAIError(errors.New(errResponse.ToMessage()), types.ErrorCodeBadResponseStatusCode, resp.StatusCode, types.ErrOptionWithUpstreamError())
if showBodyWhenFail {
newApiErr.Err = buildErrWithBody(newApiErr.Error())
}
diff --git a/types/error.go b/types/error.go
index dd1749c26..3a6a51080 100644
--- a/types/error.go
+++ b/types/error.go
@@ -95,6 +95,8 @@ type NewAPIError struct {
errorCode ErrorCode
StatusCode int
Metadata json.RawMessage
+ upstream bool
+ upstreamStatus int
}
// Unwrap enables errors.Is / errors.As to work with NewAPIError by exposing the underlying error.
@@ -119,6 +121,23 @@ func (e *NewAPIError) GetErrorType() ErrorType {
return e.errorType
}
+func (e *NewAPIError) IsUpstreamError() bool {
+ if e == nil {
+ return false
+ }
+ return e.upstream
+}
+
+func (e *NewAPIError) UpstreamStatusCode() int {
+ if e == nil {
+ return 0
+ }
+ if e.upstreamStatus != 0 {
+ return e.upstreamStatus
+ }
+ return e.StatusCode
+}
+
func (e *NewAPIError) Error() string {
if e == nil {
return ""
@@ -383,6 +402,15 @@ func ErrOptionWithSkipRetry() NewAPIErrorOptions {
}
}
+func ErrOptionWithUpstreamError() NewAPIErrorOptions {
+ return func(e *NewAPIError) {
+ e.upstream = true
+ if e.upstreamStatus == 0 {
+ e.upstreamStatus = e.StatusCode
+ }
+ }
+}
+
func ErrOptionWithNoRecordErrorLog() NewAPIErrorOptions {
return func(e *NewAPIError) {
e.recordErrorLog = common.GetPointer(false)
diff --git a/web-v2/src/pages/console/TokenListPage.tsx b/web-v2/src/pages/console/TokenListPage.tsx
index 833e30b66..6b696c2b7 100644
--- a/web-v2/src/pages/console/TokenListPage.tsx
+++ b/web-v2/src/pages/console/TokenListPage.tsx
@@ -1,4 +1,4 @@
-import { useEffect, useMemo, useRef, useState } from 'react';
+import { useEffect, useRef, useState } from 'react';
import { useNavigate } from 'react-router-dom';
import { Ban, Check, Copy, Eye, Pencil, RefreshCcw, Trash2 } from 'lucide-react';
import { fetchJson } from '@/api/client';
@@ -7,9 +7,8 @@ import { useStatus } from '@/stores/status/StatusStore';
import { toast } from '@/ui/toast';
import { confirmModal } from '@/ui/confirmModal';
import { copyText } from '@/lib/clipboard';
-import { formatUnixSeconds } from '@/lib/time';
import { formatTokenApiKey, getTokenApiKeyPrefix } from '@/lib/tokenApiKey';
-import { Button, Card, Checkbox, Chip, Input, Label, ListBox, Modal, Select, TextField } from '@/components/ui/heroui';
+import { Button, Card, Chip, Input, Label, ListBox, Modal, Select, TextField } from '@/components/ui/heroui';
import { TableActionButton } from '@/components/ui/TableActionButton';
type TokenStatus = 1 | 2 | 3 | 4;
@@ -80,25 +79,6 @@ function getServerAddress(status: any): string {
return (status?.server_address as string | undefined) || window.location.origin;
}
-function loadCompactModes(): Record
{
- try {
- return JSON.parse(localStorage.getItem('table_compact_modes') || '{}');
- } catch {
- return {};
- }
-}
-
-function setCompactMode(tableKey: string, value: boolean) {
- const modes = loadCompactModes();
- modes[tableKey] = value;
- localStorage.setItem('table_compact_modes', JSON.stringify(modes));
-}
-
-function getCompactMode(tableKey: string): boolean {
- const modes = loadCompactModes();
- return Boolean(modes[tableKey]);
-}
-
function FluentPrefillModal({
open,
models,
@@ -252,13 +232,10 @@ export function TokenListPage() {
const [total, setTotal] = useState(0);
const [keyword, setKeyword] = useState('');
- const [tokenQuery, setTokenQuery] = useState('');
const [searching, setSearching] = useState(false);
const [searchMode, setSearchMode] = useState(false);
- const [selectedIds, setSelectedIds] = useState([]);
const [keyModalToken, setKeyModalToken] = useState(null);
- const [compact, setCompact] = useState(() => getCompactMode('tokens'));
const refresh = async (nextPage = page, nextSize = pageSize) => {
setLoading(true);
@@ -270,7 +247,6 @@ export function TokenListPage() {
setTotal(res.data.total || 0);
setPage(res.data.page || nextPage);
setPageSize(res.data.page_size || nextSize);
- setSelectedIds([]);
setSearchMode(false);
} finally {
setLoading(false);
@@ -283,7 +259,7 @@ export function TokenListPage() {
}, []);
const search = async () => {
- if (!keyword.trim() && !tokenQuery.trim()) {
+ if (!keyword.trim()) {
await refresh(1, pageSize);
return;
}
@@ -292,81 +268,17 @@ export function TokenListPage() {
const res = await fetchJson>('/api/token/search', {
params: {
keyword: keyword.trim(),
- token: tokenQuery.trim(),
},
});
setTokens(res.data || []);
setTotal((res.data || []).length);
setPage(1);
setSearchMode(true);
- setSelectedIds([]);
} finally {
setSearching(false);
}
};
- const toggleAll = (checked: boolean) => {
- if (!checked) {
- setSelectedIds([]);
- return;
- }
- setSelectedIds(tokens.map((t) => t.id));
- };
-
- const toggleOne = (id: number, checked: boolean) => {
- setSelectedIds((prev) => {
- if (!checked) return prev.filter((x) => x !== id);
- return prev.includes(id) ? prev : [...prev, id];
- });
- };
-
- const selectedTokens = useMemo(() => {
- const map = new Map(tokens.map((t) => [t.id, t] as const));
- return selectedIds.map((id) => map.get(id)).filter(Boolean) as Token[];
- }, [selectedIds, tokens]);
-
- const batchCopy = async (mode: 'keys' | 'name+key') => {
- if (selectedTokens.length === 0) {
- toast.warning('Select at least one token.');
- return;
- }
- const content =
- mode === 'keys'
- ? selectedTokens.map((t) => formatTokenApiKey(t.key)).join('\n')
- : selectedTokens
- .map((t) => `${t.name} ${formatTokenApiKey(t.key)}`)
- .join('\n');
- const ok = await copyText(content);
- if (ok) toast.success('Copied');
- else toast.error('Copy failed');
- };
-
- const batchDelete = async () => {
- if (selectedTokens.length === 0) {
- toast.warning('Select at least one token.');
- return;
- }
- const ok = await confirmModal(`Delete ${selectedTokens.length} tokens?`, {
- title: 'Delete tokens',
- confirmText: 'Delete',
- cancelText: 'Cancel',
- confirmVariant: 'danger',
- });
- if (!ok) return;
- setLoading(true);
- try {
- const ids = selectedTokens.map((t) => t.id);
- const res = await fetchJson>('/api/token/batch', {
- method: 'POST',
- body: { ids },
- });
- toast.success(`Deleted ${res.data || 0} tokens`);
- await refresh(Math.max(1, page - 1), pageSize);
- } finally {
- setLoading(false);
- }
- };
-
const setStatus = async (token: Token, nextStatus: 1 | 2) => {
setLoading(true);
try {
@@ -459,9 +371,7 @@ export function TokenListPage() {
return;
}
const serverAddress = getServerAddress(status);
- const tokenToUse =
- fluentOverrideKey ||
- (selectedTokens.length === 1 ? selectedTokens[0]?.key : tokens.length > 0 ? tokens[0]?.key : '');
+ const tokenToUse = fluentOverrideKey || (tokens.length > 0 ? tokens[0]?.key : '');
if (!tokenToUse) {
toast.warning('No token available.');
return;
@@ -509,7 +419,7 @@ export function TokenListPage() {
observer.observe(root, { childList: true, subtree: true });
return () => observer.disconnect();
// eslint-disable-next-line react-hooks/exhaustive-deps
- }, [tokens, selectedTokens.length]);
+ }, [tokens]);
return (
@@ -533,15 +443,6 @@ export function TokenListPage() {
-
-
-
@@ -552,10 +453,6 @@ export function TokenListPage() {