Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions controller/relay.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,23 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
defer func() {
if newAPIError != nil {
logger.LogError(c, fmt.Sprintf("relay error: %s", newAPIError.Error()))
if shouldReturnOverloadedForUpstreamError(newAPIError) {
switch relayFormat {
case types.RelayFormatOpenAIRealtime:
helper.WssError(c, ws, overloadedOpenAIError())
case types.RelayFormatClaude:
c.JSON(http.StatusServiceUnavailable, gin.H{
"type": "error",
"error": overloadedClaudeError(),
})
default:
c.JSON(http.StatusServiceUnavailable, gin.H{
"error": overloadedOpenAIError(),
})
}
return
}

newAPIError.SetMessage(common.MessageWithRequestId(newAPIError.Error(), requestId))
switch relayFormat {
case types.RelayFormatOpenAIRealtime:
Expand Down
45 changes: 45 additions & 0 deletions controller/upstream_error_mask.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package controller

import (
"net/http"

"github.com/QuantumNous/new-api/types"
)

const overloadedMessage = "Overloaded"

func shouldMaskUpstreamStatusCode(statusCode int) bool {
if statusCode >= 500 && statusCode <= 599 {
return true
}
if statusCode >= 400 && statusCode <= 499 {
return statusCode != http.StatusBadRequest && statusCode != http.StatusRequestEntityTooLarge
}
return false
}

func shouldReturnOverloadedForUpstreamError(err *types.NewAPIError) bool {
if err == nil {
return false
}
if !err.IsUpstreamError() {
return false
}
return shouldMaskUpstreamStatusCode(err.UpstreamStatusCode())
}

func overloadedOpenAIError() types.OpenAIError {
return types.OpenAIError{
Message: overloadedMessage,
Type: "upstream_error",
Param: "",
Code: nil,
}
}

func overloadedClaudeError() types.ClaudeError {
return types.ClaudeError{
Type: "upstream_error",
Message: overloadedMessage,
}
}
15 changes: 9 additions & 6 deletions dto/channel_settings.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package dto

type ChannelSettings struct {
ForceFormat bool `json:"force_format,omitempty"`
ThinkingToContent bool `json:"thinking_to_content,omitempty"`
Proxy string `json:"proxy"`
PassThroughBodyEnabled bool `json:"pass_through_body_enabled,omitempty"`
SystemPrompt string `json:"system_prompt,omitempty"`
SystemPromptOverride bool `json:"system_prompt_override,omitempty"`
ForceFormat bool `json:"force_format,omitempty"`
// If enabled, /v1/completions requests will be proxied via /v1/chat/completions
// and responses will be converted back to the legacy Completions-compatible format.
CompletionsViaChatCompletions bool `json:"completions_via_chat_completions,omitempty"`
ThinkingToContent bool `json:"thinking_to_content,omitempty"`
Proxy string `json:"proxy"`
PassThroughBodyEnabled bool `json:"pass_through_body_enabled,omitempty"`
SystemPrompt string `json:"system_prompt,omitempty"`
SystemPromptOverride bool `json:"system_prompt_override,omitempty"`
}

type VertexKeyType string
Expand Down
74 changes: 74 additions & 0 deletions middleware/llm_endpoint_guard.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package middleware

import (
"fmt"
"net/http"

"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)

// LLMEndpointGuard blocks globally disabled LLM-related endpoints.
//
// It returns 404 to make disabled endpoints behave like they do not exist.
func LLMEndpointGuard() gin.HandlerFunc {
return func(c *gin.Context) {
enabled, ok := isCurrentLLMEndpointEnabled(c)
if ok && !enabled {
abortAsRelayNotFound(c)
return
}
c.Next()
}
}

func isCurrentLLMEndpointEnabled(c *gin.Context) (enabled bool, ok bool) {
fullPath := c.FullPath()
if fullPath == "" {
return true, false
}

settings := operation_setting.GetLLMEndpointSetting()

switch fullPath {
case "/v1/completions":
return settings.EnableCompletions, true
case "/v1/chat/completions", "/pg/chat/completions":
return settings.EnableChatCompletions, true
case "/v1/responses":
return settings.EnableResponses, true
case "/v1/messages":
return settings.EnableClaudeMessages, true
case "/v1/embeddings":
return settings.EnableEmbeddings, true
case "/v1/edits", "/v1/images/generations", "/v1/images/edits", "/v1/images/variations":
return settings.EnableImages, true
case "/v1/audio/transcriptions", "/v1/audio/translations", "/v1/audio/speech":
return settings.EnableAudio, true
case "/v1/moderations":
return settings.EnableModerations, true
case "/v1/rerank":
return settings.EnableRerank, true
case "/v1/realtime":
return settings.EnableRealtime, true
case "/v1/engines/:model/embeddings", "/v1/models/*path", "/v1beta/models", "/v1beta/openai/models", "/v1beta/models/*path":
return settings.EnableGemini, true
default:
return true, false
}
}

func abortAsRelayNotFound(c *gin.Context) {
err := types.OpenAIError{
Message: fmt.Sprintf("Invalid URL (%s %s)", c.Request.Method, c.Request.URL.Path),
Type: "invalid_request_error",
Param: "",
Code: "",
}
c.JSON(http.StatusNotFound, gin.H{
"error": err,
})
c.Abort()
}

51 changes: 51 additions & 0 deletions middleware/llm_endpoint_guard_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package middleware

import (
"net/http"
"net/http/httptest"
"testing"

"github.com/gin-gonic/gin"
)

func TestLLMEndpointGuard_Defaults(t *testing.T) {
gin.SetMode(gin.TestMode)

router := gin.New()
authMiddleware := func(c *gin.Context) {
c.Status(http.StatusUnauthorized)
c.Abort()
}

v1 := router.Group("/v1")
v1.Use(LLMEndpointGuard())
v1.Use(authMiddleware)
v1.POST("/completions", func(c *gin.Context) { c.Status(http.StatusOK) })
v1.POST("/chat/completions", func(c *gin.Context) { c.Status(http.StatusOK) })
v1.POST("/embeddings", func(c *gin.Context) { c.Status(http.StatusOK) })
v1.GET("/realtime", func(c *gin.Context) { c.Status(http.StatusOK) })

v1betaModels := router.Group("/v1beta/models")
v1betaModels.Use(LLMEndpointGuard())
v1betaModels.Use(authMiddleware)
v1betaModels.GET("", func(c *gin.Context) { c.Status(http.StatusOK) })

assertStatus := func(method, path string, want int) {
request := httptest.NewRequest(method, path, nil)
response := httptest.NewRecorder()
router.ServeHTTP(response, request)
if response.Code != want {
t.Fatalf("%s %s: expected %d, got %d", method, path, want, response.Code)
}
}

// Defaults: allow completions and chat completions.
assertStatus(http.MethodPost, "/v1/completions", http.StatusUnauthorized)
assertStatus(http.MethodPost, "/v1/chat/completions", http.StatusUnauthorized)

// Defaults: disable everything else.
assertStatus(http.MethodPost, "/v1/embeddings", http.StatusNotFound)
assertStatus(http.MethodGet, "/v1/realtime", http.StatusNotFound)
assertStatus(http.MethodGet, "/v1beta/models", http.StatusNotFound)
}

2 changes: 1 addition & 1 deletion relay/channel/ali/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ func aliImageHandler(a *Adaptor, c *gin.Context, resp *http.Response, info *rela
Type: "ali_error",
Param: "",
Code: aliResponse.Output.Code,
}, resp.StatusCode), nil
}, resp.StatusCode, types.ErrOptionWithUpstreamError()), nil
}
}

Expand Down
2 changes: 1 addition & 1 deletion relay/channel/ali/rerank.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func RerankHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI
Type: aliResponse.Code,
Param: aliResponse.RequestId,
Code: aliResponse.Code,
}, resp.StatusCode), nil
}, resp.StatusCode, types.ErrOptionWithUpstreamError()), nil
}

usage := dto.Usage{
Expand Down
4 changes: 2 additions & 2 deletions relay/channel/claude/relay-claude.go
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
return types.NewError(err, types.ErrorCodeBadResponseBody)
}
if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" {
return types.WithClaudeError(*claudeError, http.StatusInternalServerError)
return types.WithClaudeError(*claudeError, http.StatusInternalServerError, types.ErrOptionWithUpstreamError())
}
if info.RelayFormat == types.RelayFormatClaude {
FormatClaudeResponseInfo(requestMode, &claudeResponse, nil, claudeInfo)
Expand Down Expand Up @@ -739,7 +739,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
return types.NewError(err, types.ErrorCodeBadResponseBody)
}
if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" {
return types.WithClaudeError(*claudeError, http.StatusInternalServerError)
return types.WithClaudeError(*claudeError, http.StatusInternalServerError, types.ErrOptionWithUpstreamError())
}
if requestMode == RequestModeCompletion {
claudeInfo.Usage = service.ResponseText2Usage(c, claudeResponse.Completion, info.UpstreamModelName, info.GetEstimatePromptTokens())
Expand Down
2 changes: 1 addition & 1 deletion relay/channel/jimeng/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func jimengImageHandler(c *gin.Context, resp *http.Response, info *relaycommon.R
Type: "jimeng_error",
Param: "",
Code: fmt.Sprintf("%d", jimengResponse.Code),
}, resp.StatusCode)
}, resp.StatusCode, types.ErrOptionWithUpstreamError())
}

// Convert Jimeng response to OpenAI format
Expand Down
36 changes: 33 additions & 3 deletions relay/channel/openai/adaptor.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,13 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
url = strings.Replace(url, "{model}", info.UpstreamModelName, -1)
return url, nil
default:
if info.ChannelType == constant.ChannelTypeOpenAI &&
info.RelayMode == relayconstant.RelayModeCompletions &&
info.ChannelSetting.CompletionsViaChatCompletions &&
!model_setting.GetGlobalSettings().PassThroughRequestEnabled &&
!info.ChannelSetting.PassThroughBodyEnabled {
return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, "/v1/chat/completions", info.ChannelType), nil
}
if info.RelayFormat == types.RelayFormatClaude || info.RelayFormat == types.RelayFormatGemini {
return fmt.Sprintf("%s/v1/chat/completions", info.ChannelBaseUrl), nil
}
Expand Down Expand Up @@ -217,6 +224,21 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
if request == nil {
return nil, errors.New("request is nil")
}
if info.ChannelType == constant.ChannelTypeOpenAI &&
info.RelayMode == relayconstant.RelayModeCompletions &&
info.ChannelSetting.CompletionsViaChatCompletions {
promptText := completionsPromptToString(request.Prompt)
request.Messages = []dto.Message{
{
Role: "assistant",
Content: promptText,
},
}
// Remove legacy fields to keep the upstream payload chat-completions compatible.
request.Prompt = nil
request.Prefix = nil
request.Suffix = nil
}
if info.ChannelType != constant.ChannelTypeOpenAI && info.ChannelType != constant.ChannelTypeAzure {
request.StreamOptions = nil
}
Expand Down Expand Up @@ -606,10 +628,18 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
usage, err = OaiResponsesHandler(c, info, resp)
}
default:
if info.IsStream {
usage, err = OaiStreamHandler(c, info, resp)
if shouldUseChatCompletionsForCompletions(info) {
if info.IsStream {
usage, err = OaiCompletionsViaChatStreamHandler(c, info, resp)
} else {
usage, err = OpenaiCompletionsViaChatHandler(c, info, resp)
}
} else {
usage, err = OpenaiHandler(c, info, resp)
if info.IsStream {
usage, err = OaiStreamHandler(c, info, resp)
} else {
usage, err = OpenaiHandler(c, info, resp)
}
}
}
return
Expand Down
4 changes: 2 additions & 2 deletions relay/channel/openai/chat_via_responses.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func OaiResponsesToChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp
}

if oaiError := responsesResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
return nil, types.WithOpenAIError(*oaiError, resp.StatusCode, types.ErrOptionWithUpstreamError())
}

responsesResp.Model = relaycommon.MaskMappedModelName(c, info, responsesResp.Model)
Expand Down Expand Up @@ -327,7 +327,7 @@ func OaiResponsesToChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo
case "response.error", "response.failed":
if streamResp.Response != nil {
if oaiErr := streamResp.Response.GetOpenAIError(); oaiErr != nil && oaiErr.Type != "" {
streamErr = types.WithOpenAIError(*oaiErr, http.StatusInternalServerError)
streamErr = types.WithOpenAIError(*oaiErr, http.StatusInternalServerError, types.ErrOptionWithUpstreamError())
return false
}
}
Expand Down
Loading