diff --git a/fern/docs.yml b/fern/docs.yml index ab5e2e39..22f29ce1 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -290,6 +290,9 @@ navigation: - page: Tool Calling path: pages/07-llm-gateway/tool-calling.mdx slug: /tool-calling + - page: Structured Outputs + path: pages/07-llm-gateway/structured-outputs.mdx + slug: /structured-outputs - page: Migration from LeMUR path: pages/07-llm-gateway/migration-from-lemur.mdx slug: /migration-from-lemur diff --git a/fern/pages/07-llm-gateway/structured-outputs.mdx b/fern/pages/07-llm-gateway/structured-outputs.mdx new file mode 100644 index 00000000..b0273b1f --- /dev/null +++ b/fern/pages/07-llm-gateway/structured-outputs.mdx @@ -0,0 +1,303 @@ +--- +title: "Structured Outputs" +description: "Constrain model responses to follow a specific JSON schema for reliable, parseable outputs" +hidden: true +--- + +## Overview + +Structured outputs allow you to constrain the model's response to follow a specific JSON schema. This ensures the model returns data in a predictable format that can be reliably parsed and processed by your application. + + +Structured outputs are supported by OpenAI (GPT-4.1, GPT-5.x) and Gemini models. Claude and gpt-oss models do not currently support structured outputs. + + +## Getting started + +To use structured outputs, include the `response_format` parameter in your request with a `json_schema` type: + + + + +```python +import requests + +headers = { + "authorization": "", + "content-type": "application/json" +} + +response = requests.post( + "https://llm-gateway.assemblyai.com/v1/chat/completions", + headers=headers, + json={ + "model": "gemini-2.5-flash-lite", + "messages": [ + { + "role": "system", + "content": "You are a helpful math tutor. Guide the user through the solution step by step." + }, + { + "role": "user", + "content": "how can I solve 8x + 7 = -23" + } + ], + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "math_reasoning", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": {"type": "string"}, + "output": {"type": "string"} + }, + "required": ["explanation", "output"], + "additionalProperties": False + } + }, + "final_answer": {"type": "string"} + }, + "required": ["steps", "final_answer"], + "additionalProperties": False + }, + "strict": True + } + } + } +) + +result = response.json() +print(result["choices"][0]["message"]["content"]) +``` + + + + +```javascript +const response = await fetch( + "https://llm-gateway.assemblyai.com/v1/chat/completions", + { + method: "POST", + headers: { + authorization: "", + "content-type": "application/json", + }, + body: JSON.stringify({ + model: "gemini-2.5-flash-lite", + messages: [ + { + role: "system", + content: + "You are a helpful math tutor. Guide the user through the solution step by step.", + }, + { + role: "user", + content: "how can I solve 8x + 7 = -23", + }, + ], + response_format: { + type: "json_schema", + json_schema: { + name: "math_reasoning", + schema: { + type: "object", + properties: { + steps: { + type: "array", + items: { + type: "object", + properties: { + explanation: { type: "string" }, + output: { type: "string" }, + }, + required: ["explanation", "output"], + additionalProperties: false, + }, + }, + final_answer: { type: "string" }, + }, + required: ["steps", "final_answer"], + additionalProperties: false, + }, + strict: true, + }, + }, + }), + } +); + +const result = await response.json(); +console.log(result.choices[0].message.content); +``` + + + + +```bash +curl -X POST "https://llm-gateway.assemblyai.com/v1/chat/completions" \ + -H "Content-Type: application/json" \ + -H "Authorization: " \ + -d '{ + "model": "gemini-2.5-flash-lite", + "messages": [ + { + "role": "system", + "content": "You are a helpful math tutor. Guide the user through the solution step by step." + }, + { + "role": "user", + "content": "how can I solve 8x + 7 = -23" + } + ], + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "math_reasoning", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": { "type": "string" }, + "output": { "type": "string" } + }, + "required": ["explanation", "output"], + "additionalProperties": false + } + }, + "final_answer": { "type": "string" } + }, + "required": ["steps", "final_answer"], + "additionalProperties": false + }, + "strict": true + } + } + }' +``` + + + + +## Example response + +When using structured outputs, the model's response will be a JSON string that conforms to your schema: + +```json +{ + "request_id": "abc123", + "choices": [ + { + "message": { + "role": "assistant", + "content": "{\"steps\":[{\"explanation\":\"Start with the equation 8x + 7 = -23\",\"output\":\"8x + 7 = -23\"},{\"explanation\":\"Subtract 7 from both sides to isolate the term with x\",\"output\":\"8x = -30\"},{\"explanation\":\"Divide both sides by 8 to solve for x\",\"output\":\"x = -30/8 = -15/4 = -3.75\"}],\"final_answer\":\"x = -3.75\"}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "input_tokens": 85, + "output_tokens": 120, + "total_tokens": 205 + } +} +``` + +You can parse the content as JSON in your application: + +```python +import json + +content = result["choices"][0]["message"]["content"] +parsed = json.loads(content) + +for step in parsed["steps"]: + print(f"{step['explanation']}: {step['output']}") + +print(f"Final answer: {parsed['final_answer']}") +``` + +## Supported models + +Structured outputs are supported by the following model families: + +| Provider | Supported | +| -------- | --------- | +| OpenAI (GPT-4.1, GPT-5.x) | Yes | +| Gemini | Yes | +| gpt-oss | No | +| Claude | No | + +## API reference + +### Request parameters + +The `response_format` parameter controls how the model formats its response: + +| Key | Type | Required? | Description | +| -------------------------- | ------- | --------- | ----------------------------------------------------------------------------------------------- | +| `response_format` | object | No | Specifies the format of the model's response. | +| `response_format.type` | string | Yes | The type of response format. Use `"json_schema"` for structured outputs. | +| `response_format.json_schema` | object | Yes | The JSON schema configuration object. | + +### JSON schema object + +| Key | Type | Required? | Description | +| -------------------------------- | ------- | --------- | ----------------------------------------------------------------------------------------------- | +| `json_schema.name` | string | Yes | A name for the schema. Used for identification purposes. | +| `json_schema.schema` | object | Yes | A valid JSON Schema object that defines the structure of the expected response. | +| `json_schema.strict` | boolean | No | When `true`, the model will strictly adhere to the schema. Recommended for reliable parsing. | + +### Schema definition + +The `schema` object follows the [JSON Schema](https://json-schema.org/) specification. Common properties include: + +| Property | Type | Description | +| ---------------------- | ------------- | --------------------------------------------------------------------------- | +| `type` | string | The data type: `"object"`, `"array"`, `"string"`, `"number"`, `"boolean"`. | +| `properties` | object | For objects, defines the properties and their schemas. | +| `items` | object | For arrays, defines the schema for array items. | +| `required` | array | List of required property names. | +| `additionalProperties` | boolean | When `false`, prevents additional properties not defined in the schema. | + +## Best practices + +When using structured outputs, keep these recommendations in mind: + +Set `strict: true` to ensure the model's response strictly adheres to your schema. This is especially important when your application depends on specific fields being present. + +Use `additionalProperties: false` at each level of your schema to prevent the model from adding unexpected fields to the response. + +Keep your schemas focused and specific. Complex schemas with many nested levels may increase latency and token usage. + +Include clear descriptions in your system or user messages to help the model understand what data to extract or generate for each field. + +## Error handling + +If the model cannot generate a valid response that matches your schema, you may receive an error or a response that doesn't fully conform to the schema. Always validate the parsed JSON against your expected structure: + +```python +import json + +try: + content = result["choices"][0]["message"]["content"] + parsed = json.loads(content) + + # Validate required fields exist + if "steps" not in parsed or "final_answer" not in parsed: + raise ValueError("Missing required fields in response") + +except json.JSONDecodeError as e: + print(f"Failed to parse response as JSON: {e}") +except KeyError as e: + print(f"Unexpected response structure: {e}") +``` diff --git a/llm-gateway.yml b/llm-gateway.yml index ac23960b..f8aa3c2a 100644 --- a/llm-gateway.yml +++ b/llm-gateway.yml @@ -43,6 +43,38 @@ paths: prompt: "Write a haiku about coding" max_tokens: 50 temperature: 0.5 + structured_output_example: + summary: Structured output with JSON schema + value: + model: "gemini-2.5-flash-lite" + messages: + - role: "system" + content: "You are a helpful math tutor. Guide the user through the solution step by step." + - role: "user" + content: "how can I solve 8x + 7 = -23" + response_format: + type: "json_schema" + json_schema: + name: "math_reasoning" + schema: + type: "object" + properties: + steps: + type: "array" + items: + type: "object" + properties: + explanation: + type: "string" + output: + type: "string" + required: ["explanation", "output"] + additionalProperties: false + final_answer: + type: "string" + required: ["steps", "final_answer"] + additionalProperties: false + strict: true responses: "200": description: Successful response containing the model's choices. @@ -214,6 +246,9 @@ components: description: A list of tools the model may call. tool_choice: $ref: "#/components/schemas/ToolChoice" + response_format: + $ref: "#/components/schemas/ResponseFormat" + description: Specifies the format of the model's response. Use this to constrain the model to output valid JSON matching a schema. Supported by OpenAI (GPT-4.1, GPT-5.x) and Gemini models only. required: - model @@ -332,6 +367,40 @@ components: - function description: Controls which (if any) function is called by the model. + ResponseFormat: + type: object + description: Specifies the format of the model's response. Use `json_schema` type to constrain the model to output valid JSON matching a schema. + properties: + type: + type: string + enum: [json_schema] + description: The type of response format. Use `json_schema` for structured outputs. + json_schema: + $ref: "#/components/schemas/JsonSchemaConfig" + description: The JSON schema configuration object. + required: + - type + - json_schema + + JsonSchemaConfig: + type: object + description: Configuration for JSON schema-based structured outputs. + properties: + name: + type: string + description: A name for the schema. Used for identification purposes. + schema: + type: object + description: A valid JSON Schema object that defines the structure of the expected response. + additionalProperties: true + strict: + type: boolean + description: When `true`, the model will strictly adhere to the schema. Recommended for reliable parsing. + default: false + required: + - name + - schema + # Response Schemas Response: type: object