Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions client-sdks/stainless/openapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8476,6 +8476,13 @@ components:
sequence_number:
title: Sequence Number
type: integer
logprobs:
anyOf:
- items:
$ref: '#/components/schemas/OpenAITokenLogProb'
type: array
- type: 'null'
nullable: true
type:
const: response.output_text.done
default: response.output_text.done
Expand Down
7 changes: 7 additions & 0 deletions docs/static/deprecated-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5177,6 +5177,13 @@ components:
sequence_number:
title: Sequence Number
type: integer
logprobs:
anyOf:
- items:
$ref: '#/components/schemas/OpenAITokenLogProb'
type: array
- type: 'null'
nullable: true
type:
const: response.output_text.done
default: response.output_text.done
Expand Down
7 changes: 7 additions & 0 deletions docs/static/experimental-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5358,6 +5358,13 @@ components:
sequence_number:
title: Sequence Number
type: integer
logprobs:
anyOf:
- items:
$ref: '#/components/schemas/OpenAITokenLogProb'
type: array
- type: 'null'
nullable: true
type:
const: response.output_text.done
default: response.output_text.done
Expand Down
7 changes: 7 additions & 0 deletions docs/static/llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7361,6 +7361,13 @@ components:
sequence_number:
title: Sequence Number
type: integer
logprobs:
anyOf:
- items:
$ref: '#/components/schemas/OpenAITokenLogProb'
type: array
- type: 'null'
nullable: true
type:
const: response.output_text.done
default: response.output_text.done
Expand Down
7 changes: 7 additions & 0 deletions docs/static/stainless-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8476,6 +8476,13 @@ components:
sequence_number:
title: Sequence Number
type: integer
logprobs:
anyOf:
- items:
$ref: '#/components/schemas/OpenAITokenLogProb'
type: array
- type: 'null'
nullable: true
type:
const: response.output_text.done
default: response.output_text.done
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
OpenAIResponseObjectStreamResponseOutputItemAdded,
OpenAIResponseObjectStreamResponseOutputItemDone,
OpenAIResponseObjectStreamResponseOutputTextDelta,
OpenAIResponseObjectStreamResponseOutputTextDone,
OpenAIResponseObjectStreamResponseReasoningTextDelta,
OpenAIResponseObjectStreamResponseReasoningTextDone,
OpenAIResponseObjectStreamResponseRefusalDelta,
Expand Down Expand Up @@ -1145,9 +1146,19 @@ async def _process_streaming_chunks(
sequence_number=self.sequence_number,
)

# Emit content_part.done event if text content was streamed (before content gets cleared)
# Emit output_text.done and content_part.done events if text content was streamed
if content_part_emitted:
final_text = "".join(chat_response_content)
# Emit output_text.done with the final accumulated text (per OpenAI protocol)
self.sequence_number += 1
yield OpenAIResponseObjectStreamResponseOutputTextDone(
content_index=content_index,
text=final_text,
item_id=message_item_id,
output_index=message_output_index,
sequence_number=self.sequence_number,
logprobs=chat_response_logprobs if chat_response_logprobs else [],
)
self.sequence_number += 1
yield OpenAIResponseObjectStreamResponseContentPartDone(
content_index=content_index,
Expand Down
2 changes: 2 additions & 0 deletions src/llama_stack_api/openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,7 @@ class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
:param item_id: Unique identifier of the completed output item
:param output_index: Index position of the item in the output list
:param sequence_number: Sequential number for ordering streaming events
:param logprobs: Token log probability details for the completed text
:param type: Event type identifier, always "response.output_text.done"
"""

Expand All @@ -936,6 +937,7 @@ class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
item_id: str
output_index: int
sequence_number: int
logprobs: list[OpenAITokenLogProb] | None = None
type: Literal["response.output_text.done"] = "response.output_text.done"


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,8 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
)

# Should have content part events for text streaming
# Expected: response.created, response.in_progress, content_part.added, output_text.delta, content_part.done, response.completed
# Expected: response.created, response.in_progress, output_item.added, content_part.added,
# output_text.delta, output_text.done, content_part.done, output_item.done, response.completed
assert len(chunks) >= 5
assert chunks[0].type == "response.created"
assert any(chunk.type == "response.in_progress" for chunk in chunks)
Expand All @@ -250,10 +251,12 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
content_part_added_events = [c for c in chunks if c.type == "response.content_part.added"]
content_part_done_events = [c for c in chunks if c.type == "response.content_part.done"]
text_delta_events = [c for c in chunks if c.type == "response.output_text.delta"]
text_done_events = [c for c in chunks if c.type == "response.output_text.done"]

assert len(content_part_added_events) >= 1, "Should have content_part.added event for text"
assert len(content_part_done_events) >= 1, "Should have content_part.done event for text"
assert len(text_delta_events) >= 1, "Should have text delta events"
assert len(text_done_events) >= 1, "Should have output_text.done event with final accumulated text"

added_event = content_part_added_events[0]
done_event = content_part_done_events[0]
Expand All @@ -263,6 +266,20 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
assert added_event.item_id == done_event.item_id
assert added_event.response_id == done_event.response_id

# Verify output_text.done contains the final accumulated text and correct indices
text_done_event = text_done_events[0]
assert text_done_event.content_index == 0
assert text_done_event.output_index == 0
assert text_done_event.item_id == added_event.item_id
assert isinstance(text_done_event.text, str)
assert len(text_done_event.text) > 0, "output_text.done should contain the final text"

# Verify output_text.done comes before content_part.done (per OpenAI protocol)
chunk_types = [c.type for c in chunks]
text_done_idx = chunk_types.index("response.output_text.done")
content_done_idx = chunk_types.index("response.content_part.done")
assert text_done_idx < content_done_idx, "output_text.done must precede content_part.done"

# Verify final event is completion
assert chunks[-1].type == "response.completed"

Expand Down
Loading