diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index a66d107695..71dc8ef32b 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -4129,9 +4129,12 @@ components:
           description: The logit bias to use.
         logprobs:
           anyOf:
-          - type: boolean
+          - type: integer
+            maximum: 5.0
+            minimum: 0.0
           - type: 'null'
-          description: The log probabilities to use.
+          description: Include the log probabilities on the logprobs most likely output
+            tokens.
         max_tokens:
           anyOf:
           - type: integer
diff --git a/src/llama_stack_api/inference/models.py b/src/llama_stack_api/inference/models.py
index 616cf7c4dc..ea4e021b20 100644
--- a/src/llama_stack_api/inference/models.py
+++ b/src/llama_stack_api/inference/models.py
@@ -891,7 +891,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
         default=None, ge=-2.0, le=2.0, description="The penalty for repeated tokens."
     )
     logit_bias: dict[str, float] | None = Field(default=None, description="The logit bias to use.")
-    logprobs: bool | None = Field(default=None, description="The log probabilities to use.")
+    logprobs: int | None = Field(default=None, ge=0, le=5, description="Include the log probabilities on the logprobs most likely output tokens.")
     max_tokens: int | None = Field(default=None, ge=1, description="The maximum number of tokens to generate.")
     n: int | None = Field(default=None, ge=1, description="The number of completions to generate.")
     presence_penalty: float | None = Field(