diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index a66d107695..71dc8ef32b 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -4129,9 +4129,12 @@ components: description: The logit bias to use. logprobs: anyOf: - - type: boolean + - type: integer + maximum: 5.0 + minimum: 0.0 - type: 'null' - description: The log probabilities to use. + description: Include the log probabilities on the logprobs most likely output + tokens. max_tokens: anyOf: - type: integer diff --git a/src/llama_stack_api/inference/models.py b/src/llama_stack_api/inference/models.py index 616cf7c4dc..ea4e021b20 100644 --- a/src/llama_stack_api/inference/models.py +++ b/src/llama_stack_api/inference/models.py @@ -891,7 +891,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"): default=None, ge=-2.0, le=2.0, description="The penalty for repeated tokens." ) logit_bias: dict[str, float] | None = Field(default=None, description="The logit bias to use.") - logprobs: bool | None = Field(default=None, description="The log probabilities to use.") + logprobs: int | None = Field(default=None, ge=0, le=5, description="Include the log probabilities on the logprobs most likely output tokens.") max_tokens: int | None = Field(default=None, ge=1, description="The maximum number of tokens to generate.") n: int | None = Field(default=None, ge=1, description="The number of completions to generate.") presence_penalty: float | None = Field(