diff --git a/controller/relay.go b/controller/relay.go index e033847df..6ead639b1 100644 --- a/controller/relay.go +++ b/controller/relay.go @@ -86,6 +86,23 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) { defer func() { if newAPIError != nil { logger.LogError(c, fmt.Sprintf("relay error: %s", newAPIError.Error())) + if shouldReturnOverloadedForUpstreamError(newAPIError) { + switch relayFormat { + case types.RelayFormatOpenAIRealtime: + helper.WssError(c, ws, overloadedOpenAIError()) + case types.RelayFormatClaude: + c.JSON(http.StatusServiceUnavailable, gin.H{ + "type": "error", + "error": overloadedClaudeError(), + }) + default: + c.JSON(http.StatusServiceUnavailable, gin.H{ + "error": overloadedOpenAIError(), + }) + } + return + } + newAPIError.SetMessage(common.MessageWithRequestId(newAPIError.Error(), requestId)) switch relayFormat { case types.RelayFormatOpenAIRealtime: diff --git a/controller/upstream_error_mask.go b/controller/upstream_error_mask.go new file mode 100644 index 000000000..d47019517 --- /dev/null +++ b/controller/upstream_error_mask.go @@ -0,0 +1,45 @@ +package controller + +import ( + "net/http" + + "github.com/QuantumNous/new-api/types" +) + +const overloadedMessage = "Overloaded" + +func shouldMaskUpstreamStatusCode(statusCode int) bool { + if statusCode >= 500 && statusCode <= 599 { + return true + } + if statusCode >= 400 && statusCode <= 499 { + return statusCode != http.StatusBadRequest && statusCode != http.StatusRequestEntityTooLarge + } + return false +} + +func shouldReturnOverloadedForUpstreamError(err *types.NewAPIError) bool { + if err == nil { + return false + } + if !err.IsUpstreamError() { + return false + } + return shouldMaskUpstreamStatusCode(err.UpstreamStatusCode()) +} + +func overloadedOpenAIError() types.OpenAIError { + return types.OpenAIError{ + Message: overloadedMessage, + Type: "upstream_error", + Param: "", + Code: nil, + } +} + +func overloadedClaudeError() types.ClaudeError { + return types.ClaudeError{ + Type: "upstream_error", + Message: overloadedMessage, + } +} diff --git a/dto/channel_settings.go b/dto/channel_settings.go index e88f2235e..d3e00bd40 100644 --- a/dto/channel_settings.go +++ b/dto/channel_settings.go @@ -1,12 +1,15 @@ package dto type ChannelSettings struct { - ForceFormat bool `json:"force_format,omitempty"` - ThinkingToContent bool `json:"thinking_to_content,omitempty"` - Proxy string `json:"proxy"` - PassThroughBodyEnabled bool `json:"pass_through_body_enabled,omitempty"` - SystemPrompt string `json:"system_prompt,omitempty"` - SystemPromptOverride bool `json:"system_prompt_override,omitempty"` + ForceFormat bool `json:"force_format,omitempty"` + // If enabled, /v1/completions requests will be proxied via /v1/chat/completions + // and responses will be converted back to the legacy Completions-compatible format. + CompletionsViaChatCompletions bool `json:"completions_via_chat_completions,omitempty"` + ThinkingToContent bool `json:"thinking_to_content,omitempty"` + Proxy string `json:"proxy"` + PassThroughBodyEnabled bool `json:"pass_through_body_enabled,omitempty"` + SystemPrompt string `json:"system_prompt,omitempty"` + SystemPromptOverride bool `json:"system_prompt_override,omitempty"` } type VertexKeyType string diff --git a/middleware/llm_endpoint_guard.go b/middleware/llm_endpoint_guard.go new file mode 100644 index 000000000..f1907da6c --- /dev/null +++ b/middleware/llm_endpoint_guard.go @@ -0,0 +1,74 @@ +package middleware + +import ( + "fmt" + "net/http" + + "github.com/QuantumNous/new-api/setting/operation_setting" + "github.com/QuantumNous/new-api/types" + "github.com/gin-gonic/gin" +) + +// LLMEndpointGuard blocks globally disabled LLM-related endpoints. +// +// It returns 404 to make disabled endpoints behave like they do not exist. +func LLMEndpointGuard() gin.HandlerFunc { + return func(c *gin.Context) { + enabled, ok := isCurrentLLMEndpointEnabled(c) + if ok && !enabled { + abortAsRelayNotFound(c) + return + } + c.Next() + } +} + +func isCurrentLLMEndpointEnabled(c *gin.Context) (enabled bool, ok bool) { + fullPath := c.FullPath() + if fullPath == "" { + return true, false + } + + settings := operation_setting.GetLLMEndpointSetting() + + switch fullPath { + case "/v1/completions": + return settings.EnableCompletions, true + case "/v1/chat/completions", "/pg/chat/completions": + return settings.EnableChatCompletions, true + case "/v1/responses": + return settings.EnableResponses, true + case "/v1/messages": + return settings.EnableClaudeMessages, true + case "/v1/embeddings": + return settings.EnableEmbeddings, true + case "/v1/edits", "/v1/images/generations", "/v1/images/edits", "/v1/images/variations": + return settings.EnableImages, true + case "/v1/audio/transcriptions", "/v1/audio/translations", "/v1/audio/speech": + return settings.EnableAudio, true + case "/v1/moderations": + return settings.EnableModerations, true + case "/v1/rerank": + return settings.EnableRerank, true + case "/v1/realtime": + return settings.EnableRealtime, true + case "/v1/engines/:model/embeddings", "/v1/models/*path", "/v1beta/models", "/v1beta/openai/models", "/v1beta/models/*path": + return settings.EnableGemini, true + default: + return true, false + } +} + +func abortAsRelayNotFound(c *gin.Context) { + err := types.OpenAIError{ + Message: fmt.Sprintf("Invalid URL (%s %s)", c.Request.Method, c.Request.URL.Path), + Type: "invalid_request_error", + Param: "", + Code: "", + } + c.JSON(http.StatusNotFound, gin.H{ + "error": err, + }) + c.Abort() +} + diff --git a/middleware/llm_endpoint_guard_test.go b/middleware/llm_endpoint_guard_test.go new file mode 100644 index 000000000..87eda6dcf --- /dev/null +++ b/middleware/llm_endpoint_guard_test.go @@ -0,0 +1,51 @@ +package middleware + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/gin-gonic/gin" +) + +func TestLLMEndpointGuard_Defaults(t *testing.T) { + gin.SetMode(gin.TestMode) + + router := gin.New() + authMiddleware := func(c *gin.Context) { + c.Status(http.StatusUnauthorized) + c.Abort() + } + + v1 := router.Group("/v1") + v1.Use(LLMEndpointGuard()) + v1.Use(authMiddleware) + v1.POST("/completions", func(c *gin.Context) { c.Status(http.StatusOK) }) + v1.POST("/chat/completions", func(c *gin.Context) { c.Status(http.StatusOK) }) + v1.POST("/embeddings", func(c *gin.Context) { c.Status(http.StatusOK) }) + v1.GET("/realtime", func(c *gin.Context) { c.Status(http.StatusOK) }) + + v1betaModels := router.Group("/v1beta/models") + v1betaModels.Use(LLMEndpointGuard()) + v1betaModels.Use(authMiddleware) + v1betaModels.GET("", func(c *gin.Context) { c.Status(http.StatusOK) }) + + assertStatus := func(method, path string, want int) { + request := httptest.NewRequest(method, path, nil) + response := httptest.NewRecorder() + router.ServeHTTP(response, request) + if response.Code != want { + t.Fatalf("%s %s: expected %d, got %d", method, path, want, response.Code) + } + } + + // Defaults: allow completions and chat completions. + assertStatus(http.MethodPost, "/v1/completions", http.StatusUnauthorized) + assertStatus(http.MethodPost, "/v1/chat/completions", http.StatusUnauthorized) + + // Defaults: disable everything else. + assertStatus(http.MethodPost, "/v1/embeddings", http.StatusNotFound) + assertStatus(http.MethodGet, "/v1/realtime", http.StatusNotFound) + assertStatus(http.MethodGet, "/v1beta/models", http.StatusNotFound) +} + diff --git a/relay/channel/ali/image.go b/relay/channel/ali/image.go index cfd9a0fdd..4a74192c9 100644 --- a/relay/channel/ali/image.go +++ b/relay/channel/ali/image.go @@ -315,7 +315,7 @@ func aliImageHandler(a *Adaptor, c *gin.Context, resp *http.Response, info *rela Type: "ali_error", Param: "", Code: aliResponse.Output.Code, - }, resp.StatusCode), nil + }, resp.StatusCode, types.ErrOptionWithUpstreamError()), nil } } diff --git a/relay/channel/ali/rerank.go b/relay/channel/ali/rerank.go index 1323fc830..2cd98211a 100644 --- a/relay/channel/ali/rerank.go +++ b/relay/channel/ali/rerank.go @@ -51,7 +51,7 @@ func RerankHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI Type: aliResponse.Code, Param: aliResponse.RequestId, Code: aliResponse.Code, - }, resp.StatusCode), nil + }, resp.StatusCode, types.ErrOptionWithUpstreamError()), nil } usage := dto.Usage{ diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go index fe37a26e1..fa6dcd34c 100644 --- a/relay/channel/claude/relay-claude.go +++ b/relay/channel/claude/relay-claude.go @@ -642,7 +642,7 @@ func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud return types.NewError(err, types.ErrorCodeBadResponseBody) } if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" { - return types.WithClaudeError(*claudeError, http.StatusInternalServerError) + return types.WithClaudeError(*claudeError, http.StatusInternalServerError, types.ErrOptionWithUpstreamError()) } if info.RelayFormat == types.RelayFormatClaude { FormatClaudeResponseInfo(requestMode, &claudeResponse, nil, claudeInfo) @@ -739,7 +739,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud return types.NewError(err, types.ErrorCodeBadResponseBody) } if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" { - return types.WithClaudeError(*claudeError, http.StatusInternalServerError) + return types.WithClaudeError(*claudeError, http.StatusInternalServerError, types.ErrOptionWithUpstreamError()) } if requestMode == RequestModeCompletion { claudeInfo.Usage = service.ResponseText2Usage(c, claudeResponse.Completion, info.UpstreamModelName, info.GetEstimatePromptTokens()) diff --git a/relay/channel/jimeng/image.go b/relay/channel/jimeng/image.go index e422e061d..0f16bde13 100644 --- a/relay/channel/jimeng/image.go +++ b/relay/channel/jimeng/image.go @@ -69,7 +69,7 @@ func jimengImageHandler(c *gin.Context, resp *http.Response, info *relaycommon.R Type: "jimeng_error", Param: "", Code: fmt.Sprintf("%d", jimengResponse.Code), - }, resp.StatusCode) + }, resp.StatusCode, types.ErrOptionWithUpstreamError()) } // Convert Jimeng response to OpenAI format diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index 52f5d731b..9e7bba42f 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -171,6 +171,13 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { url = strings.Replace(url, "{model}", info.UpstreamModelName, -1) return url, nil default: + if info.ChannelType == constant.ChannelTypeOpenAI && + info.RelayMode == relayconstant.RelayModeCompletions && + info.ChannelSetting.CompletionsViaChatCompletions && + !model_setting.GetGlobalSettings().PassThroughRequestEnabled && + !info.ChannelSetting.PassThroughBodyEnabled { + return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, "/v1/chat/completions", info.ChannelType), nil + } if info.RelayFormat == types.RelayFormatClaude || info.RelayFormat == types.RelayFormatGemini { return fmt.Sprintf("%s/v1/chat/completions", info.ChannelBaseUrl), nil } @@ -217,6 +224,21 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn if request == nil { return nil, errors.New("request is nil") } + if info.ChannelType == constant.ChannelTypeOpenAI && + info.RelayMode == relayconstant.RelayModeCompletions && + info.ChannelSetting.CompletionsViaChatCompletions { + promptText := completionsPromptToString(request.Prompt) + request.Messages = []dto.Message{ + { + Role: "assistant", + Content: promptText, + }, + } + // Remove legacy fields to keep the upstream payload chat-completions compatible. + request.Prompt = nil + request.Prefix = nil + request.Suffix = nil + } if info.ChannelType != constant.ChannelTypeOpenAI && info.ChannelType != constant.ChannelTypeAzure { request.StreamOptions = nil } @@ -606,10 +628,18 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom usage, err = OaiResponsesHandler(c, info, resp) } default: - if info.IsStream { - usage, err = OaiStreamHandler(c, info, resp) + if shouldUseChatCompletionsForCompletions(info) { + if info.IsStream { + usage, err = OaiCompletionsViaChatStreamHandler(c, info, resp) + } else { + usage, err = OpenaiCompletionsViaChatHandler(c, info, resp) + } } else { - usage, err = OpenaiHandler(c, info, resp) + if info.IsStream { + usage, err = OaiStreamHandler(c, info, resp) + } else { + usage, err = OpenaiHandler(c, info, resp) + } } } return diff --git a/relay/channel/openai/chat_via_responses.go b/relay/channel/openai/chat_via_responses.go index 1a2d1883e..9c916601b 100644 --- a/relay/channel/openai/chat_via_responses.go +++ b/relay/channel/openai/chat_via_responses.go @@ -36,7 +36,7 @@ func OaiResponsesToChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp } if oaiError := responsesResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" { - return nil, types.WithOpenAIError(*oaiError, resp.StatusCode) + return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError()) } responsesResp.Model = relaycommon.MaskMappedModelName(c, info, responsesResp.Model) @@ -327,7 +327,7 @@ func OaiResponsesToChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo case "response.error", "response.failed": if streamResp.Response != nil { if oaiErr := streamResp.Response.GetOpenAIError(); oaiErr != nil && oaiErr.Type != "" { - streamErr = types.WithOpenAIError(*oaiErr, http.StatusInternalServerError) + streamErr = types.WithOpenAIError(*oaiErr, http.StatusInternalServerError, types.ErrOptionWithUpstreamError()) return false } } diff --git a/relay/channel/openai/completions_via_chat.go b/relay/channel/openai/completions_via_chat.go new file mode 100644 index 000000000..6b529c63f --- /dev/null +++ b/relay/channel/openai/completions_via_chat.go @@ -0,0 +1,68 @@ +package openai + +import ( + "fmt" + "strings" + + "github.com/QuantumNous/new-api/common" + "github.com/QuantumNous/new-api/constant" + relaycommon "github.com/QuantumNous/new-api/relay/common" + relayconstant "github.com/QuantumNous/new-api/relay/constant" + "github.com/QuantumNous/new-api/setting/model_setting" +) + +func shouldUseChatCompletionsForCompletions(info *relaycommon.RelayInfo) bool { + if info == nil { + return false + } + if info.ChannelType != constant.ChannelTypeOpenAI { + return false + } + if info.RelayMode != relayconstant.RelayModeCompletions { + return false + } + if !info.ChannelSetting.CompletionsViaChatCompletions { + return false + } + // This feature needs request-body conversion, so it cannot be used when passthrough is enabled. + if model_setting.GetGlobalSettings().PassThroughRequestEnabled || info.ChannelSetting.PassThroughBodyEnabled { + return false + } + return true +} + +func completionsCompatibleID(upstreamID string) string { + if upstreamID == "" { + return "" + } + if strings.HasPrefix(upstreamID, "chatcmpl-") { + return "cmpl-" + strings.TrimPrefix(upstreamID, "chatcmpl-") + } + return upstreamID +} + +func completionsPromptToString(prompt any) string { + if prompt == nil { + return "" + } + + switch v := prompt.(type) { + case string: + return v + case []any: + parts := make([]string, 0, len(v)) + for _, item := range v { + if item == nil { + continue + } + if s, ok := item.(string); ok { + parts = append(parts, s) + continue + } + parts = append(parts, common.Interface2String(item)) + } + return strings.Join(parts, "\n") + default: + return fmt.Sprintf("%v", prompt) + } +} diff --git a/relay/channel/openai/completions_via_chat_response.go b/relay/channel/openai/completions_via_chat_response.go new file mode 100644 index 000000000..0062ec3d6 --- /dev/null +++ b/relay/channel/openai/completions_via_chat_response.go @@ -0,0 +1,351 @@ +package openai + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "strconv" + "strings" + + "github.com/QuantumNous/new-api/common" + "github.com/QuantumNous/new-api/dto" + "github.com/QuantumNous/new-api/logger" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/relay/helper" + "github.com/QuantumNous/new-api/service" + "github.com/QuantumNous/new-api/types" + + "github.com/gin-gonic/gin" +) + +type openAICompletionsResponse struct { + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + Model string `json:"model"` + Choices []openAICompletionsChoice `json:"choices"` + Usage dto.Usage `json:"usage"` + Error any `json:"error,omitempty"` + Extra map[string]json.RawMessage `json:"-"` +} + +type openAICompletionsChoice struct { + Text string `json:"text"` + Index int `json:"index"` + Logprobs any `json:"logprobs"` + FinishReason string `json:"finish_reason"` +} + +type openAICompletionsStreamChunk struct { + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + Model string `json:"model"` + Choices []openAICompletionsChunkItem `json:"choices"` +} + +type openAICompletionsChunkItem struct { + Text string `json:"text"` + Index int `json:"index"` + Logprobs any `json:"logprobs"` + FinishReason *string `json:"finish_reason"` +} + +func parseCreatedUnix(created any) int64 { + switch v := created.(type) { + case int64: + return v + case int: + return int64(v) + case float64: + return int64(v) + case json.Number: + if i, err := v.Int64(); err == nil { + return i + } + case string: + if i, err := strconv.ParseInt(v, 10, 64); err == nil { + return i + } + } + return 0 +} + +func messageToCompletionText(message dto.Message, thinkingToContent bool) string { + content := message.StringContent() + if !thinkingToContent { + return content + } + + reasoning := message.ReasoningContent + if reasoning == "" { + reasoning = message.Reasoning + } + if reasoning == "" { + return content + } + + if content == "" { + return "\n" + reasoning + "\n\n" + } + return "\n" + reasoning + "\n\n" + content +} + +func OpenaiCompletionsViaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + defer service.CloseResponseBodyGracefully(resp) + + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) + } + if common.DebugEnabled { + println("upstream response body:", string(responseBody)) + } + + var chatResp dto.OpenAITextResponse + if err := common.Unmarshal(responseBody, &chatResp); err != nil { + return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) + } + + if oaiError := chatResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" { + return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError()) + } + + usage := chatResp.Usage + if usage.PromptTokens == 0 { + completionTokens := usage.CompletionTokens + if completionTokens == 0 { + for _, choice := range chatResp.Choices { + text := messageToCompletionText(choice.Message, info.ChannelSetting.ThinkingToContent) + completionTokens += service.CountTextToken(text, info.UpstreamModelName) + } + } + usage = dto.Usage{ + PromptTokens: info.GetEstimatePromptTokens(), + CompletionTokens: completionTokens, + TotalTokens: info.GetEstimatePromptTokens() + completionTokens, + } + } + + applyUsagePostProcessing(info, &usage, responseBody) + + completionsResp := openAICompletionsResponse{ + ID: completionsCompatibleID(chatResp.Id), + Object: "text_completion", + Created: parseCreatedUnix(chatResp.Created), + Model: relaycommon.MaskMappedModelName(c, info, chatResp.Model), + Choices: make([]openAICompletionsChoice, 0, len(chatResp.Choices)), + Usage: usage, + } + + for _, choice := range chatResp.Choices { + completionsResp.Choices = append(completionsResp.Choices, openAICompletionsChoice{ + Text: messageToCompletionText(choice.Message, info.ChannelSetting.ThinkingToContent), + Index: choice.Index, + Logprobs: nil, + FinishReason: choice.FinishReason, + }) + } + + responseBody, err = common.Marshal(completionsResp) + if err != nil { + return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) + } + + service.IOCopyBytesGracefully(c, resp, responseBody) + return &usage, nil +} + +func OaiCompletionsViaChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + if resp == nil || resp.Body == nil { + logger.LogError(c, "invalid response or response body") + return nil, types.NewOpenAIError(fmt.Errorf("invalid response"), types.ErrorCodeBadResponse, http.StatusInternalServerError) + } + + defer service.CloseResponseBodyGracefully(resp) + + var ( + lastStreamData string + containStreamUsage bool + usage = &dto.Usage{} + responseTextBuilder strings.Builder + ) + + helper.StreamScannerHandler(c, resp, info, func(data string) bool { + if data == "" { + return true + } + lastStreamData = data + + var streamResp dto.ChatCompletionsStreamResponse + if err := common.Unmarshal(common.StringToByteSlice(data), &streamResp); err != nil { + logger.LogError(c, "failed to unmarshal chat completion chunk: "+err.Error()) + return false + } + + if service.ValidUsage(streamResp.Usage) { + containStreamUsage = true + usage = streamResp.Usage + } + + convertedChunks, chunkText := chatChunkToCompletionsChunks(c, info, streamResp) + if chunkText != "" { + responseTextBuilder.WriteString(chunkText) + } + for _, chunk := range convertedChunks { + info.SendResponseCount++ + if err := helper.ObjectData(c, chunk); err != nil { + logger.LogError(c, "failed to write completion chunk: "+err.Error()) + return false + } + } + return true + }) + + if !containStreamUsage { + usage = service.ResponseText2Usage( + c, + responseTextBuilder.String(), + info.UpstreamModelName, + info.GetEstimatePromptTokens(), + ) + } + + applyUsagePostProcessing(info, usage, common.StringToByteSlice(lastStreamData)) + helper.Done(c) + return usage, nil +} + +func chatChunkToCompletionsChunks(c *gin.Context, info *relaycommon.RelayInfo, chatChunk dto.ChatCompletionsStreamResponse) ([]openAICompletionsStreamChunk, string) { + if info == nil { + return nil, "" + } + + created := chatChunk.Created + model := relaycommon.MaskMappedModelName(c, info, chatChunk.Model) + id := completionsCompatibleID(chatChunk.Id) + + object := "text_completion" + + // When thinking_to_content is enabled, emulate the existing tag-injection behavior + // from sendStreamData, but output as legacy completions chunks. + hasThinkingContent := false + hasContent := false + var thinkingContent strings.Builder + for _, choice := range chatChunk.Choices { + if rc := choice.Delta.GetReasoningContent(); len(rc) > 0 { + hasThinkingContent = true + thinkingContent.WriteString(rc) + } + if cc := choice.Delta.GetContentString(); len(cc) > 0 { + hasContent = true + } + } + + chunks := make([]openAICompletionsStreamChunk, 0, 2) + var sentText strings.Builder + + if info.ChannelSetting.ThinkingToContent && info.ThinkingContentInfo.IsFirstThinkingContent { + if hasThinkingContent { + text := "\n" + thinkingContent.String() + chunk := openAICompletionsStreamChunk{ + ID: id, + Object: object, + Created: created, + Model: model, + Choices: make([]openAICompletionsChunkItem, 0, len(chatChunk.Choices)), + } + for _, choice := range chatChunk.Choices { + chunk.Choices = append(chunk.Choices, openAICompletionsChunkItem{ + Text: text, + Index: choice.Index, + Logprobs: nil, + FinishReason: nil, + }) + } + chunks = append(chunks, chunk) + sentText.WriteString(text) + info.ThinkingContentInfo.IsFirstThinkingContent = false + info.ThinkingContentInfo.HasSentThinkingContent = true + return chunks, sentText.String() + } + } + + // Insert closing tag before the first content token after thinking. + if info.ChannelSetting.ThinkingToContent && + hasContent && + !info.ThinkingContentInfo.SendLastThinkingContent && + info.ThinkingContentInfo.HasSentThinkingContent { + + text := "\n\n" + chunk := openAICompletionsStreamChunk{ + ID: id, + Object: object, + Created: created, + Model: model, + Choices: make([]openAICompletionsChunkItem, 0, len(chatChunk.Choices)), + } + for _, choice := range chatChunk.Choices { + chunk.Choices = append(chunk.Choices, openAICompletionsChunkItem{ + Text: text, + Index: choice.Index, + Logprobs: nil, + FinishReason: nil, + }) + } + chunks = append(chunks, chunk) + sentText.WriteString(text) + info.ThinkingContentInfo.SendLastThinkingContent = true + } + + // Convert the actual chunk. + chunk := openAICompletionsStreamChunk{ + ID: id, + Object: object, + Created: created, + Model: model, + Choices: make([]openAICompletionsChunkItem, 0, len(chatChunk.Choices)), + } + + for _, choice := range chatChunk.Choices { + var text string + + if info.ChannelSetting.ThinkingToContent { + if rc := choice.Delta.GetReasoningContent(); rc != "" { + text = rc + } else { + text = choice.Delta.GetContentString() + } + } else { + text = choice.Delta.GetContentString() + } + + chunk.Choices = append(chunk.Choices, openAICompletionsChunkItem{ + Text: text, + Index: choice.Index, + Logprobs: nil, + FinishReason: choice.FinishReason, + }) + sentText.WriteString(text) + } + + // Skip the leading assistant-role-only chunk(s) to avoid leaking chat-specific patterns. + shouldSend := false + for _, choice := range chunk.Choices { + if choice.Text != "" || choice.FinishReason != nil { + shouldSend = true + break + } + } + if shouldSend { + chunks = append(chunks, chunk) + } + + // Usage-only chunks are not part of legacy completions streaming; don't forward them. + // We keep them for billing via `containStreamUsage` in the caller. + if !shouldSend { + sentText.Reset() + } + return chunks, sentText.String() +} diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go index 83125a5f8..7b0c2c169 100644 --- a/relay/channel/openai/relay-openai.go +++ b/relay/channel/openai/relay-openai.go @@ -232,7 +232,7 @@ func OpenaiHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respo simpleResponse.Model = relaycommon.MaskMappedModelName(c, info, simpleResponse.Model) if oaiError := simpleResponse.GetOpenAIError(); oaiError != nil && oaiError.Type != "" { - return nil, types.WithOpenAIError(*oaiError, resp.StatusCode) + return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError()) } forceFormat := false diff --git a/relay/channel/openai/relay_responses.go b/relay/channel/openai/relay_responses.go index 7b7e38b6b..dd3c186a8 100644 --- a/relay/channel/openai/relay_responses.go +++ b/relay/channel/openai/relay_responses.go @@ -31,7 +31,7 @@ func OaiResponsesHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } if oaiError := responsesResponse.GetOpenAIError(); oaiError != nil && oaiError.Type != "" { - return nil, types.WithOpenAIError(*oaiError, resp.StatusCode) + return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError()) } if responsesResponse.HasImageGenerationCall() { diff --git a/relay/channel/palm/relay-palm.go b/relay/channel/palm/relay-palm.go index 786ea4cd2..782cc787b 100644 --- a/relay/channel/palm/relay-palm.go +++ b/relay/channel/palm/relay-palm.go @@ -118,7 +118,7 @@ func palmHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respons Type: palmResponse.Error.Status, Param: "", Code: palmResponse.Error.Code, - }, resp.StatusCode) + }, resp.StatusCode, types.ErrOptionWithUpstreamError()) } fullTextResponse := responsePaLM2OpenAI(&palmResponse) usage := service.ResponseText2Usage(c, palmResponse.Candidates[0].Content, info.UpstreamModelName, info.GetEstimatePromptTokens()) diff --git a/relay/channel/tencent/relay-tencent.go b/relay/channel/tencent/relay-tencent.go index dbe7750e4..aef4f36b9 100644 --- a/relay/channel/tencent/relay-tencent.go +++ b/relay/channel/tencent/relay-tencent.go @@ -148,7 +148,7 @@ func tencentHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Resp return nil, types.WithOpenAIError(types.OpenAIError{ Message: tencentSb.Response.Error.Message, Code: tencentSb.Response.Error.Code, - }, resp.StatusCode) + }, resp.StatusCode, types.ErrOptionWithUpstreamError()) } fullTextResponse := responseTencent2OpenAI(&tencentSb.Response) jsonResponse, err := common.Marshal(fullTextResponse) diff --git a/relay/channel/zhipu/relay-zhipu.go b/relay/channel/zhipu/relay-zhipu.go index 964dff082..26b502f8d 100644 --- a/relay/channel/zhipu/relay-zhipu.go +++ b/relay/channel/zhipu/relay-zhipu.go @@ -233,7 +233,7 @@ func zhipuHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respon return nil, types.WithOpenAIError(types.OpenAIError{ Message: zhipuResponse.Msg, Code: zhipuResponse.Code, - }, resp.StatusCode) + }, resp.StatusCode, types.ErrOptionWithUpstreamError()) } fullTextResponse := responseZhipu2OpenAI(&zhipuResponse) jsonResponse, err := json.Marshal(fullTextResponse) diff --git a/relay/channel/zhipu_4v/image.go b/relay/channel/zhipu_4v/image.go index b1fd2c8e3..0f27eea73 100644 --- a/relay/channel/zhipu_4v/image.go +++ b/relay/channel/zhipu_4v/image.go @@ -71,7 +71,7 @@ func zhipu4vImageHandler(c *gin.Context, resp *http.Response, info *relaycommon. Message: zhipuResp.Error.Message, Type: "zhipu_image_error", Code: zhipuResp.Error.Code, - }, resp.StatusCode) + }, resp.StatusCode, types.ErrOptionWithUpstreamError()) } payload := openAIImagePayload{} diff --git a/router/relay-router.go b/router/relay-router.go index b5ac16c64..23c47d583 100644 --- a/router/relay-router.go +++ b/router/relay-router.go @@ -43,6 +43,7 @@ func SetRelayRouter(router *gin.Engine) { } geminiRouter := router.Group("/v1beta/models") + geminiRouter.Use(middleware.LLMEndpointGuard()) geminiRouter.Use(middleware.TokenAuth()) { geminiRouter.GET("", func(c *gin.Context) { @@ -51,6 +52,7 @@ func SetRelayRouter(router *gin.Engine) { } geminiCompatibleRouter := router.Group("/v1beta/openai/models") + geminiCompatibleRouter.Use(middleware.LLMEndpointGuard()) geminiCompatibleRouter.Use(middleware.TokenAuth()) { geminiCompatibleRouter.GET("", func(c *gin.Context) { @@ -59,11 +61,13 @@ func SetRelayRouter(router *gin.Engine) { } playgroundRouter := router.Group("/pg") + playgroundRouter.Use(middleware.LLMEndpointGuard()) playgroundRouter.Use(middleware.UserAuth(), middleware.Distribute()) { playgroundRouter.POST("/chat/completions", controller.Playground) } relayV1Router := router.Group("/v1") + relayV1Router.Use(middleware.LLMEndpointGuard()) relayV1Router.Use(middleware.TokenAuth()) relayV1Router.Use(middleware.ModelRequestRateLimit()) { @@ -173,6 +177,7 @@ func SetRelayRouter(router *gin.Engine) { } relayGeminiRouter := router.Group("/v1beta") + relayGeminiRouter.Use(middleware.LLMEndpointGuard()) relayGeminiRouter.Use(middleware.TokenAuth()) relayGeminiRouter.Use(middleware.ModelRequestRateLimit()) relayGeminiRouter.Use(middleware.Distribute()) diff --git a/service/error.go b/service/error.go index 8191953cf..589eb1b77 100644 --- a/service/error.go +++ b/service/error.go @@ -82,7 +82,7 @@ func ClaudeErrorWrapperLocal(err error, code string, statusCode int) *dto.Claude } func RelayErrorHandler(ctx context.Context, resp *http.Response, showBodyWhenFail bool) (newApiErr *types.NewAPIError) { - newApiErr = types.InitOpenAIError(types.ErrorCodeBadResponseStatusCode, resp.StatusCode) + newApiErr = types.InitOpenAIError(types.ErrorCodeBadResponseStatusCode, resp.StatusCode, types.ErrOptionWithUpstreamError()) responseBody, err := io.ReadAll(resp.Body) if err != nil { @@ -112,14 +112,14 @@ func RelayErrorHandler(ctx context.Context, resp *http.Response, showBodyWhenFai // General format error (OpenAI, Anthropic, Gemini, etc.) oaiError := errResponse.TryToOpenAIError() if oaiError != nil { - newApiErr = types.WithOpenAIError(*oaiError, resp.StatusCode) + newApiErr = types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError()) if showBodyWhenFail { newApiErr.Err = buildErrWithBody(newApiErr.Error()) } return } } - newApiErr = types.NewOpenAIError(errors.New(errResponse.ToMessage()), types.ErrorCodeBadResponseStatusCode, resp.StatusCode) + newApiErr = types.NewOpenAIError(errors.New(errResponse.ToMessage()), types.ErrorCodeBadResponseStatusCode, resp.StatusCode, types.ErrOptionWithUpstreamError()) if showBodyWhenFail { newApiErr.Err = buildErrWithBody(newApiErr.Error()) } diff --git a/setting/operation_setting/llm_endpoint_setting.go b/setting/operation_setting/llm_endpoint_setting.go new file mode 100644 index 000000000..8152d39fa --- /dev/null +++ b/setting/operation_setting/llm_endpoint_setting.go @@ -0,0 +1,45 @@ +package operation_setting + +import "github.com/QuantumNous/new-api/setting/config" + +// LLMEndpointSetting controls which LLM-related API endpoints are enabled globally. +// +// Disabled endpoints should behave like the endpoint does not exist (HTTP 404). +// +// Default: enable completions and chat completions, disable everything else. +type LLMEndpointSetting struct { + EnableCompletions bool `json:"enable_completions"` + EnableChatCompletions bool `json:"enable_chat_completions"` + EnableResponses bool `json:"enable_responses"` + EnableClaudeMessages bool `json:"enable_claude_messages"` + EnableEmbeddings bool `json:"enable_embeddings"` + EnableImages bool `json:"enable_images"` + EnableAudio bool `json:"enable_audio"` + EnableModerations bool `json:"enable_moderations"` + EnableRerank bool `json:"enable_rerank"` + EnableRealtime bool `json:"enable_realtime"` + EnableGemini bool `json:"enable_gemini"` +} + +var llmEndpointSetting = LLMEndpointSetting{ + EnableCompletions: true, + EnableChatCompletions: true, + EnableResponses: false, + EnableClaudeMessages: false, + EnableEmbeddings: false, + EnableImages: false, + EnableAudio: false, + EnableModerations: false, + EnableRerank: false, + EnableRealtime: false, + EnableGemini: false, +} + +func init() { + config.GlobalConfig.Register("llm_endpoint_setting", &llmEndpointSetting) +} + +func GetLLMEndpointSetting() *LLMEndpointSetting { + return &llmEndpointSetting +} + diff --git a/types/error.go b/types/error.go index dd1749c26..3a6a51080 100644 --- a/types/error.go +++ b/types/error.go @@ -95,6 +95,8 @@ type NewAPIError struct { errorCode ErrorCode StatusCode int Metadata json.RawMessage + upstream bool + upstreamStatus int } // Unwrap enables errors.Is / errors.As to work with NewAPIError by exposing the underlying error. @@ -119,6 +121,23 @@ func (e *NewAPIError) GetErrorType() ErrorType { return e.errorType } +func (e *NewAPIError) IsUpstreamError() bool { + if e == nil { + return false + } + return e.upstream +} + +func (e *NewAPIError) UpstreamStatusCode() int { + if e == nil { + return 0 + } + if e.upstreamStatus != 0 { + return e.upstreamStatus + } + return e.StatusCode +} + func (e *NewAPIError) Error() string { if e == nil { return "" @@ -383,6 +402,15 @@ func ErrOptionWithSkipRetry() NewAPIErrorOptions { } } +func ErrOptionWithUpstreamError() NewAPIErrorOptions { + return func(e *NewAPIError) { + e.upstream = true + if e.upstreamStatus == 0 { + e.upstreamStatus = e.StatusCode + } + } +} + func ErrOptionWithNoRecordErrorLog() NewAPIErrorOptions { return func(e *NewAPIError) { e.recordErrorLog = common.GetPointer(false) diff --git a/web-v2/src/pages/console/TokenListPage.tsx b/web-v2/src/pages/console/TokenListPage.tsx index 833e30b66..6b696c2b7 100644 --- a/web-v2/src/pages/console/TokenListPage.tsx +++ b/web-v2/src/pages/console/TokenListPage.tsx @@ -1,4 +1,4 @@ -import { useEffect, useMemo, useRef, useState } from 'react'; +import { useEffect, useRef, useState } from 'react'; import { useNavigate } from 'react-router-dom'; import { Ban, Check, Copy, Eye, Pencil, RefreshCcw, Trash2 } from 'lucide-react'; import { fetchJson } from '@/api/client'; @@ -7,9 +7,8 @@ import { useStatus } from '@/stores/status/StatusStore'; import { toast } from '@/ui/toast'; import { confirmModal } from '@/ui/confirmModal'; import { copyText } from '@/lib/clipboard'; -import { formatUnixSeconds } from '@/lib/time'; import { formatTokenApiKey, getTokenApiKeyPrefix } from '@/lib/tokenApiKey'; -import { Button, Card, Checkbox, Chip, Input, Label, ListBox, Modal, Select, TextField } from '@/components/ui/heroui'; +import { Button, Card, Chip, Input, Label, ListBox, Modal, Select, TextField } from '@/components/ui/heroui'; import { TableActionButton } from '@/components/ui/TableActionButton'; type TokenStatus = 1 | 2 | 3 | 4; @@ -80,25 +79,6 @@ function getServerAddress(status: any): string { return (status?.server_address as string | undefined) || window.location.origin; } -function loadCompactModes(): Record { - try { - return JSON.parse(localStorage.getItem('table_compact_modes') || '{}'); - } catch { - return {}; - } -} - -function setCompactMode(tableKey: string, value: boolean) { - const modes = loadCompactModes(); - modes[tableKey] = value; - localStorage.setItem('table_compact_modes', JSON.stringify(modes)); -} - -function getCompactMode(tableKey: string): boolean { - const modes = loadCompactModes(); - return Boolean(modes[tableKey]); -} - function FluentPrefillModal({ open, models, @@ -252,13 +232,10 @@ export function TokenListPage() { const [total, setTotal] = useState(0); const [keyword, setKeyword] = useState(''); - const [tokenQuery, setTokenQuery] = useState(''); const [searching, setSearching] = useState(false); const [searchMode, setSearchMode] = useState(false); - const [selectedIds, setSelectedIds] = useState([]); const [keyModalToken, setKeyModalToken] = useState(null); - const [compact, setCompact] = useState(() => getCompactMode('tokens')); const refresh = async (nextPage = page, nextSize = pageSize) => { setLoading(true); @@ -270,7 +247,6 @@ export function TokenListPage() { setTotal(res.data.total || 0); setPage(res.data.page || nextPage); setPageSize(res.data.page_size || nextSize); - setSelectedIds([]); setSearchMode(false); } finally { setLoading(false); @@ -283,7 +259,7 @@ export function TokenListPage() { }, []); const search = async () => { - if (!keyword.trim() && !tokenQuery.trim()) { + if (!keyword.trim()) { await refresh(1, pageSize); return; } @@ -292,81 +268,17 @@ export function TokenListPage() { const res = await fetchJson>('/api/token/search', { params: { keyword: keyword.trim(), - token: tokenQuery.trim(), }, }); setTokens(res.data || []); setTotal((res.data || []).length); setPage(1); setSearchMode(true); - setSelectedIds([]); } finally { setSearching(false); } }; - const toggleAll = (checked: boolean) => { - if (!checked) { - setSelectedIds([]); - return; - } - setSelectedIds(tokens.map((t) => t.id)); - }; - - const toggleOne = (id: number, checked: boolean) => { - setSelectedIds((prev) => { - if (!checked) return prev.filter((x) => x !== id); - return prev.includes(id) ? prev : [...prev, id]; - }); - }; - - const selectedTokens = useMemo(() => { - const map = new Map(tokens.map((t) => [t.id, t] as const)); - return selectedIds.map((id) => map.get(id)).filter(Boolean) as Token[]; - }, [selectedIds, tokens]); - - const batchCopy = async (mode: 'keys' | 'name+key') => { - if (selectedTokens.length === 0) { - toast.warning('Select at least one token.'); - return; - } - const content = - mode === 'keys' - ? selectedTokens.map((t) => formatTokenApiKey(t.key)).join('\n') - : selectedTokens - .map((t) => `${t.name} ${formatTokenApiKey(t.key)}`) - .join('\n'); - const ok = await copyText(content); - if (ok) toast.success('Copied'); - else toast.error('Copy failed'); - }; - - const batchDelete = async () => { - if (selectedTokens.length === 0) { - toast.warning('Select at least one token.'); - return; - } - const ok = await confirmModal(`Delete ${selectedTokens.length} tokens?`, { - title: 'Delete tokens', - confirmText: 'Delete', - cancelText: 'Cancel', - confirmVariant: 'danger', - }); - if (!ok) return; - setLoading(true); - try { - const ids = selectedTokens.map((t) => t.id); - const res = await fetchJson>('/api/token/batch', { - method: 'POST', - body: { ids }, - }); - toast.success(`Deleted ${res.data || 0} tokens`); - await refresh(Math.max(1, page - 1), pageSize); - } finally { - setLoading(false); - } - }; - const setStatus = async (token: Token, nextStatus: 1 | 2) => { setLoading(true); try { @@ -459,9 +371,7 @@ export function TokenListPage() { return; } const serverAddress = getServerAddress(status); - const tokenToUse = - fluentOverrideKey || - (selectedTokens.length === 1 ? selectedTokens[0]?.key : tokens.length > 0 ? tokens[0]?.key : ''); + const tokenToUse = fluentOverrideKey || (tokens.length > 0 ? tokens[0]?.key : ''); if (!tokenToUse) { toast.warning('No token available.'); return; @@ -509,7 +419,7 @@ export function TokenListPage() { observer.observe(root, { childList: true, subtree: true }); return () => observer.disconnect(); // eslint-disable-next-line react-hooks/exhaustive-deps - }, [tokens, selectedTokens.length]); + }, [tokens]); return (
@@ -533,15 +443,6 @@ export function TokenListPage() {
- - -
@@ -552,10 +453,6 @@ export function TokenListPage() { - - - -
- - { - setCompact(selected); - setCompactMode('tokens', selected); - }} - > - - - - - - - @@ -595,21 +475,8 @@ export function TokenListPage() { - - {!compact ? ( - <> - - - - ) : null} @@ -619,25 +486,12 @@ export function TokenListPage() { const muted = token.status === 2; return ( - - {!compact ? ( - <> - - - - ) : null}
- 0 && selectedIds.length === tokens.length} - onChange={(e) => toggleAll(e.target.checked)} - /> - Name StatusUsedCreatedKey Actions
- toggleOne(token.id, e.target.checked)} - /> - {token.name || '(unnamed)'} {tokenStatusLabel(token.status)} {token.used_quota}{formatUnixSeconds(token.created_time)}
diff --git a/web-v2/src/pages/console/UsageLogsPage.tsx b/web-v2/src/pages/console/UsageLogsPage.tsx index 9bd52db3e..032f0baf8 100644 --- a/web-v2/src/pages/console/UsageLogsPage.tsx +++ b/web-v2/src/pages/console/UsageLogsPage.tsx @@ -27,34 +27,6 @@ type LogRow = { type PageInfo = { page: number; page_size: number; total: number; items: T }; -const DEFAULT_COLUMNS = [ - 'created_at', - 'model_name', - 'token_name', - 'quota', - 'prompt_tokens', - 'completion_tokens', - 'use_time', -] as const; - -type ColumnKey = (typeof DEFAULT_COLUMNS)[number] | 'is_stream' | 'ip'; - -function loadColumns(): ColumnKey[] { - try { - const raw = localStorage.getItem('logs-table-columns-user'); - if (!raw) return [...DEFAULT_COLUMNS]; - const parsed = JSON.parse(raw); - if (!Array.isArray(parsed)) return [...DEFAULT_COLUMNS]; - return parsed as ColumnKey[]; - } catch { - return [...DEFAULT_COLUMNS]; - } -} - -function saveColumns(keys: ColumnKey[]) { - localStorage.setItem('logs-table-columns-user', JSON.stringify(keys)); -} - function loadPageSize() { const raw = localStorage.getItem('page-size'); const num = raw ? Number(raw) : 20; @@ -87,7 +59,6 @@ export function UsageLogsPage() { const [end, setEnd] = useState(() => toDateTimeLocalValueFromSeconds(Math.floor(Date.now() / 1000) + 3600)); const [stat, setStat] = useState<{ quota: number; rpm: number; tpm: number } | null>(null); - const [columns, setColumns] = useState(() => loadColumns()); const [detail, setDetail] = useState(null); @@ -137,14 +108,6 @@ export function UsageLogsPage() { // eslint-disable-next-line react-hooks/exhaustive-deps }, [page, pageSize]); - const toggleColumn = (key: ColumnKey, checked: boolean) => { - setColumns((prev) => { - const next = checked ? (prev.includes(key) ? prev : [...prev, key]) : prev.filter((c) => c !== key); - saveColumns(next); - return next; - }); - }; - return (
- -
-
Columns
- {([ - 'created_at', - 'model_name', - 'token_name', - 'quota', - 'prompt_tokens', - 'completion_tokens', - 'use_time', - 'is_stream', - 'ip', - ] as ColumnKey[]).map((c) => ( - - ))} -
@@ -272,30 +211,30 @@ export function UsageLogsPage() { - {columns.includes('created_at') ? : null} - {columns.includes('model_name') ? : null} - {columns.includes('token_name') ? : null} - {columns.includes('quota') ? : null} - {columns.includes('prompt_tokens') ? : null} - {columns.includes('completion_tokens') ? : null} - {columns.includes('use_time') ? : null} - {columns.includes('is_stream') ? : null} - {columns.includes('ip') ? : null} + + + + + + + + + {items.map((row) => ( - {columns.includes('created_at') ? : null} - {columns.includes('model_name') ? : null} - {columns.includes('token_name') ? : null} - {columns.includes('quota') ? : null} - {columns.includes('prompt_tokens') ? : null} - {columns.includes('completion_tokens') ? : null} - {columns.includes('use_time') ? : null} - {columns.includes('is_stream') ? : null} - {columns.includes('ip') ? : null} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ); +}
TimeModelTokenCostPromptCompletionTime(s)StreamIPTimeModelTokenCostPromptCompletionTime(s)StreamIP Actions
{formatUnixSeconds(row.created_at)}{row.model_name}{row.token_name}${(row.quota / quotaPerUnit).toFixed(6)}{row.prompt_tokens}{row.completion_tokens}{row.use_time}{row.is_stream ? 'Yes' : 'No'}{row.ip}{formatUnixSeconds(row.created_at)}{row.model_name}{row.token_name}${(row.quota / quotaPerUnit).toFixed(6)}{row.prompt_tokens}{row.completion_tokens}{row.use_time}{row.is_stream ? 'Yes' : 'No'}{row.ip}
setDetail(row)}> diff --git a/web/src/components/settings/OperationSetting.jsx b/web/src/components/settings/OperationSetting.jsx index 4a77bcf10..16d4ffb17 100644 --- a/web/src/components/settings/OperationSetting.jsx +++ b/web/src/components/settings/OperationSetting.jsx @@ -27,6 +27,7 @@ import SettingsLog from '../../pages/Setting/Operation/SettingsLog'; import SettingsMonitoring from '../../pages/Setting/Operation/SettingsMonitoring'; import SettingsCreditLimit from '../../pages/Setting/Operation/SettingsCreditLimit'; import SettingsCheckin from '../../pages/Setting/Operation/SettingsCheckin'; +import SettingsLLMEndpoints from '../../pages/Setting/Operation/SettingsLLMEndpoints'; import { API, showError, toBoolean } from '../../helpers'; const OperationSetting = () => { @@ -76,6 +77,19 @@ const OperationSetting = () => { 'checkin_setting.enabled': false, 'checkin_setting.min_quota': 1000, 'checkin_setting.max_quota': 10000, + + /* LLM 端点开关 */ + 'llm_endpoint_setting.enable_completions': true, + 'llm_endpoint_setting.enable_chat_completions': true, + 'llm_endpoint_setting.enable_responses': false, + 'llm_endpoint_setting.enable_claude_messages': false, + 'llm_endpoint_setting.enable_embeddings': false, + 'llm_endpoint_setting.enable_images': false, + 'llm_endpoint_setting.enable_audio': false, + 'llm_endpoint_setting.enable_moderations': false, + 'llm_endpoint_setting.enable_rerank': false, + 'llm_endpoint_setting.enable_realtime': false, + 'llm_endpoint_setting.enable_gemini': false, }); let [loading, setLoading] = useState(false); @@ -121,6 +135,10 @@ const OperationSetting = () => { + {/* LLM 端点开关 */} + + + {/* 顶栏模块管理 */}
diff --git a/web/src/components/table/channels/modals/EditChannelModal.jsx b/web/src/components/table/channels/modals/EditChannelModal.jsx index e8c4c5414..c903cd444 100644 --- a/web/src/components/table/channels/modals/EditChannelModal.jsx +++ b/web/src/components/table/channels/modals/EditChannelModal.jsx @@ -151,6 +151,7 @@ const EditChannelModal = (props) => { multi_key_mode: 'random', // 渠道额外设置的默认值 force_format: false, + completions_via_chat_completions: false, thinking_to_content: false, proxy: '', pass_through_body_enabled: false, @@ -370,10 +371,12 @@ const EditChannelModal = (props) => { // 渠道额外设置状态 const [channelSettings, setChannelSettings] = useState({ force_format: false, + completions_via_chat_completions: false, thinking_to_content: false, proxy: '', pass_through_body_enabled: false, system_prompt: '', + system_prompt_override: false, }); const showApiConfigCard = true; // 控制是否显示 API 配置卡片 const getInitValues = () => ({ ...originInputs }); @@ -576,6 +579,8 @@ const EditChannelModal = (props) => { try { const parsedSettings = JSON.parse(data.setting); data.force_format = parsedSettings.force_format || false; + data.completions_via_chat_completions = + parsedSettings.completions_via_chat_completions || false; data.thinking_to_content = parsedSettings.thinking_to_content || false; data.proxy = parsedSettings.proxy || ''; @@ -587,6 +592,7 @@ const EditChannelModal = (props) => { } catch (error) { console.error('Failed to parse channel settings JSON:', error); data.force_format = false; + data.completions_via_chat_completions = false; data.thinking_to_content = false; data.proxy = ''; data.pass_through_body_enabled = false; @@ -595,6 +601,7 @@ const EditChannelModal = (props) => { } } else { data.force_format = false; + data.completions_via_chat_completions = false; data.thinking_to_content = false; data.proxy = ''; data.pass_through_body_enabled = false; @@ -663,6 +670,7 @@ const EditChannelModal = (props) => { // 同步更新channelSettings状态显示 setChannelSettings({ force_format: data.force_format, + completions_via_chat_completions: data.completions_via_chat_completions, thinking_to_content: data.thinking_to_content, proxy: data.proxy, pass_through_body_enabled: data.pass_through_body_enabled, @@ -1296,6 +1304,8 @@ const EditChannelModal = (props) => { // 生成渠道额外设置JSON const channelExtraSettings = { force_format: localInputs.force_format || false, + completions_via_chat_completions: + localInputs.completions_via_chat_completions || false, thinking_to_content: localInputs.thinking_to_content || false, proxy: localInputs.proxy || '', pass_through_body_enabled: localInputs.pass_through_body_enabled || false, @@ -1347,6 +1357,7 @@ const EditChannelModal = (props) => { // 清理不需要发送到后端的字段 delete localInputs.force_format; + delete localInputs.completions_via_chat_completions; delete localInputs.thinking_to_content; delete localInputs.proxy; delete localInputs.pass_through_body_enabled; @@ -3203,6 +3214,24 @@ const EditChannelModal = (props) => { /> )} + {inputs.type === 1 && ( + + handleChannelSettingsChange( + 'completions_via_chat_completions', + value, + ) + } + extraText={t( + '开启后:/v1/completions 将通过 /v1/chat/completions 转发(最后一条消息使用 assistant role),并返回 completions 兼容格式', + )} + /> + )} + . + +For commercial licensing, please contact support@quantumnous.com +*/ + +import React, { useEffect, useRef, useState } from 'react'; +import { Banner, Button, Col, Form, Row, Spin } from '@douyinfe/semi-ui'; +import { useTranslation } from 'react-i18next'; +import { + API, + compareObjects, + showError, + showSuccess, + showWarning, +} from '../../../helpers'; + +const defaultLLMEndpointInputs = { + 'llm_endpoint_setting.enable_completions': true, + 'llm_endpoint_setting.enable_chat_completions': true, + 'llm_endpoint_setting.enable_responses': false, + 'llm_endpoint_setting.enable_claude_messages': false, + 'llm_endpoint_setting.enable_embeddings': false, + 'llm_endpoint_setting.enable_images': false, + 'llm_endpoint_setting.enable_audio': false, + 'llm_endpoint_setting.enable_moderations': false, + 'llm_endpoint_setting.enable_rerank': false, + 'llm_endpoint_setting.enable_realtime': false, + 'llm_endpoint_setting.enable_gemini': false, +}; + +export default function SettingsLLMEndpoints(props) { + const { t } = useTranslation(); + const [loading, setLoading] = useState(false); + const [inputs, setInputs] = useState(defaultLLMEndpointInputs); + const [inputsRow, setInputsRow] = useState(inputs); + const refForm = useRef(); + + function handleFieldChange(fieldName) { + return (value) => { + setInputs((prev) => ({ ...prev, [fieldName]: value })); + }; + } + + function onSubmit() { + const updateArray = compareObjects(inputs, inputsRow); + if (!updateArray.length) return showWarning(t('你似乎并没有修改什么')); + + const requestQueue = updateArray.map((item) => + API.put('/api/option/', { + key: item.key, + value: String(inputs[item.key]), + }), + ); + + setLoading(true); + Promise.all(requestQueue) + .then((res) => { + if (requestQueue.length === 1) { + if (res.includes(undefined)) return; + } else if (requestQueue.length > 1) { + if (res.includes(undefined)) + return showError(t('部分保存失败,请重试')); + } + showSuccess(t('保存成功')); + props.refresh(); + }) + .catch(() => { + showError(t('保存失败,请重试')); + }) + .finally(() => { + setLoading(false); + }); + } + + useEffect(() => { + const currentInputs = {}; + for (const key of Object.keys(defaultLLMEndpointInputs)) { + if (props.options[key] !== undefined) { + currentInputs[key] = props.options[key]; + } + } + + const mergedInputs = { ...defaultLLMEndpointInputs, ...currentInputs }; + setInputs(mergedInputs); + setInputsRow(structuredClone(mergedInputs)); + refForm.current?.setValues(mergedInputs); + }, [props.options]); + + return ( + +
(refForm.current = formAPI)} + style={{ marginBottom: 15 }} + > + + + + +