From 3f65c716338b3fadde761dd888e3cc6be29801bf Mon Sep 17 00:00:00 2001
From: Extra Small <littleshuai.bot@gmail.com>
Date: Fri, 27 Mar 2026 12:30:58 -0700
Subject: [PATCH] fix: change logprobs type from bool to int in Completions
 endpoint

The OpenAI Completions API expects logprobs as an integer (0-5)
specifying how many top log probabilities to return per token.
The Chat Completions endpoint correctly uses bool, but the legacy
Completions model had the wrong type, causing 400 errors when
passing an integer value.

Fixes #5253
---
 docs/static/llama-stack-spec.yaml       | 7 +++++--
 src/llama_stack_api/inference/models.py | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index a66d107695..71dc8ef32b 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -4129,9 +4129,12 @@ components:
           description: The logit bias to use.
         logprobs:
           anyOf:
-          - type: boolean
+          - type: integer
+            maximum: 5.0
+            minimum: 0.0
           - type: 'null'
-          description: The log probabilities to use.
+          description: Include the log probabilities on the logprobs most likely output
+            tokens.
         max_tokens:
           anyOf:
           - type: integer
diff --git a/src/llama_stack_api/inference/models.py b/src/llama_stack_api/inference/models.py
index 616cf7c4dc..ea4e021b20 100644
--- a/src/llama_stack_api/inference/models.py
+++ b/src/llama_stack_api/inference/models.py
@@ -891,7 +891,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
         default=None, ge=-2.0, le=2.0, description="The penalty for repeated tokens."
     )
     logit_bias: dict[str, float] | None = Field(default=None, description="The logit bias to use.")
-    logprobs: bool | None = Field(default=None, description="The log probabilities to use.")
+    logprobs: int | None = Field(default=None, ge=0, le=5, description="Include the log probabilities on the logprobs most likely output tokens.")
     max_tokens: int | None = Field(default=None, ge=1, description="The maximum number of tokens to generate.")
     n: int | None = Field(default=None, ge=1, description="The number of completions to generate.")
     presence_penalty: float | None = Field(