From 3f65c716338b3fadde761dd888e3cc6be29801bf Mon Sep 17 00:00:00 2001 From: Extra Small Date: Fri, 27 Mar 2026 12:30:58 -0700 Subject: [PATCH] fix: change logprobs type from bool to int in Completions endpoint The OpenAI Completions API expects logprobs as an integer (0-5) specifying how many top log probabilities to return per token. The Chat Completions endpoint correctly uses bool, but the legacy Completions model had the wrong type, causing 400 errors when passing an integer value. Fixes #5253 --- docs/static/llama-stack-spec.yaml | 7 +++++-- src/llama_stack_api/inference/models.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index a66d107695..71dc8ef32b 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -4129,9 +4129,12 @@ components: description: The logit bias to use. logprobs: anyOf: - - type: boolean + - type: integer + maximum: 5.0 + minimum: 0.0 - type: 'null' - description: The log probabilities to use. + description: Include the log probabilities on the logprobs most likely output + tokens. max_tokens: anyOf: - type: integer diff --git a/src/llama_stack_api/inference/models.py b/src/llama_stack_api/inference/models.py index 616cf7c4dc..ea4e021b20 100644 --- a/src/llama_stack_api/inference/models.py +++ b/src/llama_stack_api/inference/models.py @@ -891,7 +891,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"): default=None, ge=-2.0, le=2.0, description="The penalty for repeated tokens." ) logit_bias: dict[str, float] | None = Field(default=None, description="The logit bias to use.") - logprobs: bool | None = Field(default=None, description="The log probabilities to use.") + logprobs: int | None = Field(default=None, ge=0, le=5, description="Include the log probabilities on the logprobs most likely output tokens.") max_tokens: int | None = Field(default=None, ge=1, description="The maximum number of tokens to generate.") n: int | None = Field(default=None, ge=1, description="The number of completions to generate.") presence_penalty: float | None = Field(