From 4fce2b14d3cbefdc10d901e8df7a5ebf752ab41b Mon Sep 17 00:00:00 2001
From: lgavincrl <lucy.gavin@speechmatics.com>
Date: Thu, 18 Dec 2025 15:49:55 +0000
Subject: [PATCH 1/6] Update Voice SDK overview documentation with quickstart
 example and restructured configuration sections

- Adjust quickstart example
- Restructure configuration elements with separate tabs for custom configs, overlays, and serialization
- Add preset descriptions
- Update page title from "Voice agents overview" to "Voice SDK overview"
- Clarify SDK description to mention Python explicitly
- Improve API key setup
---
 docs/voice-agents/overview.mdx | 122 +++++++++++++++++++++++++++------
 1 file changed, 100 insertions(+), 22 deletions(-)

diff --git a/docs/voice-agents/overview.mdx b/docs/voice-agents/overview.mdx
index 4f7dced0..9f989f2e 100644
--- a/docs/voice-agents/overview.mdx
+++ b/docs/voice-agents/overview.mdx
@@ -1,17 +1,17 @@
 ---
-description: Learn how to build voice-enabled applications with the Speechmatics Voice SDK
+description: Learn how to build voice-enabled applications with the Speechmatics Voice SDK 
 ---
 import Admonition from '@theme/Admonition';
 import CodeBlock from '@theme/CodeBlock';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-import pythonVoiceQuickstart from "./assets/basic-quickstart.py?raw"
-import pythonVoicePresets from "./assets/presets.py?raw"
 import pythonVoiceCustomConfig from "./assets/custom-config.py?raw"
+import pythonVoiceConfigOverlays from "./assets/config-overlays.py?raw"
+import pythonVoiceConfigSerialization from "./assets/config-serialization.py?raw"
 
-# Voice agents overview
-The Voice SDK builds on our Realtime API to provide features optimized for conversational AI:
+# Voice SDK overview
+The Voice SDK builds on our Realtime API to provide additional features optimized for conversational AI, using Python:
 
 - **Intelligent segmentation**: groups words into meaningful speech segments per speaker.
 - **Turn detection**: automatically detects when speakers finish talking.
@@ -39,7 +39,8 @@ Use the Realtime SDK when:
 
 ### 1. Create an API key
 
-[Create an API key in the portal](https://portal.speechmatics.com/settings/api-keys) to access the Voice SDK. Store your key securely as a managed secret.
+[Create a Speechmatics API key in the portal](https://portal.speechmatics.com/settings/api-keys) to access the Voice SDK. 
+Store your key securely as a managed secret. 
 
 ### 2. Install dependencies
 
@@ -51,37 +52,114 @@ pip install speechmatics-voice
 pip install speechmatics-voice[smart]
 ```
 
-### 3. Configure 
+### 3. Quickstart
+
+Here's how to stream microphone audio to the Voice Agent and transcribe finalised segments of speech, with speaker ID:
+
+```python
+import asyncio
+import os
+from speechmatics.rt import Microphone
+from speechmatics.voice import VoiceAgentClient, AgentServerMessageType
+
+async def main():
+    """Stream microphone audio to Speechmatics Voice Agent using 'scribe' preset"""
+
+    # Audio configuration
+    SAMPLE_RATE = 16000         # Hz
+    CHUNK_SIZE = 160            # Samples per read
+    PRESET = "scribe"           # Configuration preset
+
+    # Create client with preset
+    client = VoiceAgentClient(
+        api_key=os.getenv("SPEECHMATICS_API_KEY"),
+        preset=PRESET
+    )
+
+    # Print finalised segments of speech with speaker ID
+    @client.on(AgentServerMessageType.ADD_SEGMENT)
+    def on_segment(message):
+        for segment in message["segments"]:
+            speaker = segment["speaker_id"]
+            text = segment["text"]
+            print(f"{speaker}: {text}")
+
+    # Setup microphone
+    mic = Microphone(SAMPLE_RATE, CHUNK_SIZE)
+    if not mic.start():
+        print("Error: Microphone not available")
+        return
+
+    # Connect to the Voice Agent
+    await client.connect()
+
+    # Stream microphone audio (interruptable using keyboard)
+    try:
+        while True:
+            audio_chunk = await mic.read(CHUNK_SIZE)
+            if not audio_chunk:
+                break # Microphone stopped producing data
+            await client.send_audio(audio_chunk)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        await client.disconnect()
+
+if __name__ == "__main__":
+    asyncio.run(main())
 
-Replace `YOUR_API_KEY` with your actual API key from the portal:
+```
+
+#### Presets - the simplest way to get started
+These are optimized configurations for common use cases and require no further settings:
+
+`fast` - low latency, fast responses
+
+`adaptive` - natural dialogue in conversation
+
+`smart_turn` - advanced conversation, with ML turn detection
+
+`external` - external end of turn - endpointing handled by the client
+
+`scribe` - note-taking
+
+`captions` - live captioning
+
+To view all available presets:
+```python
+presets = VoiceAgentConfigPreset.list_presets()
+```
 
+### 4. Custom configurations
+
+For more control, you can also specify custom configurations:
 <Tabs>
-  <TabItem value="python-sdk" label="Python (Voice SDK)">
-  <CodeBlock language="python">
-    {pythonVoiceQuickstart}
-  </CodeBlock>
+<TabItem value='voice-custom-config' label='Custom configurations'>
+Specify configurations in a `VoiceAgentConfig` object:
+<CodeBlock language="python">
+    {pythonVoiceCustomConfig}
+</CodeBlock>
 </TabItem>
-<TabItem value='voice-presets' label='Voice SDK presets'>
+<TabItem value='voice-custom-config-overlays' label='Custom configurations with overlays'>
+Use presets as a starting point and customise with overlays:
 <CodeBlock language="python">
-    {pythonVoicePresets}
+    {pythonVoiceConfigOverlays}
 </CodeBlock>
 </TabItem>
-<TabItem value='voice-custom-config' label='Custom config'>
+<TabItem value='voice-custom-config-serialization' label='Configuration serialization'>
+Export or import configurations using JSON:
 <CodeBlock language="python">
-    {pythonVoiceCustomConfig}
+    {pythonVoiceConfigSerialization}
 </CodeBlock>
 </TabItem>
 </Tabs>
 
-## FAQ
+Note: If no config or preset is provided, the client will default to the external preset.
 
-### Implementation and deployment
 
-<details>
-<summary>Can I deploy this in my own environment?</summary>
 
-Yes! The Voice SDK can be consumed via our managed service or deployed in your own environment. To learn more about on-premises deployment options, [speak to sales](https://www.speechmatics.com/speak-to-sales).
-</details>
+
+## FAQ
 
 ### Support
 

From 85300b16f911b1c020adbf091f4bdce0170f7548 Mon Sep 17 00:00:00 2001
From: lgavincrl <lucy.gavin@speechmatics.com>
Date: Thu, 18 Dec 2025 15:50:18 +0000
Subject: [PATCH 2/6] Duplicate of overview doc

---
 docs/voice-agents/quickstart.mdx | 110 -------------------------------
 1 file changed, 110 deletions(-)
 delete mode 100644 docs/voice-agents/quickstart.mdx

diff --git a/docs/voice-agents/quickstart.mdx b/docs/voice-agents/quickstart.mdx
deleted file mode 100644
index 5b3dad73..00000000
--- a/docs/voice-agents/quickstart.mdx
+++ /dev/null
@@ -1,110 +0,0 @@
----
-description: Learn how to build voice-enabled applications with the Speechmatics voice SDK
----
-import Admonition from '@theme/Admonition';
-import CodeBlock from '@theme/CodeBlock';
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
-
-import pythonVoiceQuickstart from "./assets/basic-quickstart.py?raw"
-import pythonVoicePresets from "./assets/presets.py?raw"
-import pythonVoiceCustomConfig from "./assets/custom-config.py?raw"
-
-# Voice agent overview
-
-The voice SDK builds on our real-time API to provide features optimized for conversational AI:
-
-- **Intelligent segmentation**: groups words into meaningful speech segments per speaker.
-- **Turn detection**: automatically detects when speakers finish talking.
-- **Speaker management**: focus on or ignore specific speakers in multi-speaker scenarios.
-- **Preset configurations**: offers ready-to-use settings for conversations, note-taking, and captions.
-- **Simplified event handling**: delivers clean, structured segments instead of raw word-level events.
-
-### When to use the voice SDK vs real-time SDK
-
-Use the voice SDK when:
-
-- Building conversational AI or voice agents
-- You need automatic turn detection
-- You want speaker-focused transcription
-- You need ready-to-use presets for common scenarios
-
-Use the realtime SDK when:
-
-- You need the raw stream of word-by-word transcription data
-- Building custom segmentation logic
-- You want fine-grained control over every event
-- Processing batch files or custom workflows
-
-## Getting started
-
-### 1. Get your API key
-
-[Create an API key in the portal](https://portal.speechmatics.com/settings/api-keys) to access the voice SDK. Store your key securely as a managed secret.
-
-### 2. Install dependencies
-
-```bash
-# Standard installation
-pip install speechmatics-voice
-
-# With SMART_TURN (ML-based turn detection)
-pip install speechmatics-voice[smart]
-```
-
-### 3. Quickstart
-
-Replace `YOUR_API_KEY` with your actual API key from the portal:
-
-<Tabs>
-  <TabItem value="python-sdk" label="Python (Voice SDK)">
-  <CodeBlock language="python">
-    {pythonVoiceQuickstart}
-  </CodeBlock>
-</TabItem>
-<TabItem value='voice-presets' label='Voice SDK presets'>
-<CodeBlock language="python">
-    {pythonVoicePresets}
-</CodeBlock>
-</TabItem>
-<TabItem value='voice-custom-config' label='Custom config'>
-<CodeBlock language="python">
-    {pythonVoiceCustomConfig}
-</CodeBlock>
-</TabItem>
-</Tabs>
-
-## FAQ
-
-### Implementation and deployment
-
-<details>
-<summary>Can I deploy this in my own environment?</summary>
-
-Yes! The voice agent SDK can be consumed via our managed service or deployed in your own environment. To learn more about on-premises deployment options, [speak to sales](https://www.speechmatics.com/speak-to-sales).
-</details>
-
-### Support
-
-<details>
-<summary>Where can I provide feedback or get help?</summary>
-
-You can submit feedback, bug reports, or feature requests through the Speechmatics [GitHub discussions](https://github.com/orgs/speechmatics/discussions).
-</details>
-
-## Next steps
-
-For more information, see the [voice agent Python SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on github.
-
-To learn more, check out [the Speechmatics academy](https://github.com/speechmatics/speechmatics-academy)
-
-Ready to build something amazing with our voice agent SDK? We'd love to hear about your project and help you succeed.
-
-**Get in touch with us:**
-- Share your feedback and feature requests
-- Ask questions about implementation
-- Discuss enterprise pricing and custom voices
-- Report any issues or bugs you encounter
-
-[Contact our team](https://support.speechmatics.com) or join our developer community (https://www.reddit.com/r/Speechmatics) to connect with other builders using text to speech.
-

From def273e1d736219e1fff988c30b000e4aea21cec Mon Sep 17 00:00:00 2001
From: lgavincrl <lucy.gavin@speechmatics.com>
Date: Thu, 18 Dec 2025 15:50:48 +0000
Subject: [PATCH 3/6] Add configuration constants and improve code doc

---
 docs/voice-agents/assets/basic-quickstart.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/docs/voice-agents/assets/basic-quickstart.py b/docs/voice-agents/assets/basic-quickstart.py
index e649dbec..621f902d 100644
--- a/docs/voice-agents/assets/basic-quickstart.py
+++ b/docs/voice-agents/assets/basic-quickstart.py
@@ -4,10 +4,17 @@
 from speechmatics.voice import VoiceAgentClient, AgentServerMessageType
 
 async def main():
+    """Stream microphone audio to Speechmatics Voice Agent using 'scribe' preset"""
+
+    # Audio configuration
+    SAMPLE_RATE = 16000         # Hz
+    CHUNK_SIZE = 160            # Samples per read
+    PRESET = "scribe"           # Configuration preset
+
     # Create client with preset
     client = VoiceAgentClient(
         api_key=os.getenv("YOUR_API_KEY"),
-        preset="scribe"
+        preset=PRESET
     )
 
     # Handle final segments
@@ -19,17 +26,20 @@ def on_segment(message):
             print(f"{speaker}: {text}")
 
     # Setup microphone
-    mic = Microphone(sample_rate=16000, chunk_size=320)
+    mic = Microphone(SAMPLE_RATE, CHUNK_SIZE)
     if not mic.start():
         print("Error: Microphone not available")
         return
 
-    # Connect and stream
+    # Connect to the Voice agent
     await client.connect()
 
+    # Stream microphone audio (interruptible using keyboard)
     try:
         while True:
-            audio_chunk = await mic.read(320)
+            audio_chunk = await mic.read(CHUNK_SIZE)
+            if not audio_chunk:
+                break # Microphone stopped producing data
             await client.send_audio(audio_chunk)
     except KeyboardInterrupt:
         pass

From 85795022b426edf0d062f012f5c5b7dcd252afe7 Mon Sep 17 00:00:00 2001
From: lgavincrl <lucy.gavin@speechmatics.com>
Date: Thu, 18 Dec 2025 15:51:03 +0000
Subject: [PATCH 4/6] Add example code for Voice SDK configuration overlays
 with preset customization

---
 docs/voice-agents/assets/config-overlays.py | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 docs/voice-agents/assets/config-overlays.py

diff --git a/docs/voice-agents/assets/config-overlays.py b/docs/voice-agents/assets/config-overlays.py
new file mode 100644
index 00000000..be840baa
--- /dev/null
+++ b/docs/voice-agents/assets/config-overlays.py
@@ -0,0 +1,9 @@
+from speechmatics.voice import VoiceAgentConfigPreset, VoiceAgentConfig
+
+# Use preset with custom overrides
+config = VoiceAgentConfigPreset.SCRIBE(
+    VoiceAgentConfig(
+        language="es",
+        max_delay=0.8
+    )
+)

From 03af5e2d2a9e7279ccc383907945ee40b2b03d01 Mon Sep 17 00:00:00 2001
From: lgavincrl <lucy.gavin@speechmatics.com>
Date: Thu, 18 Dec 2025 15:51:24 +0000
Subject: [PATCH 5/6] Add example code for Voice SDK configuration
 serialization with JSON import/export

---
 docs/voice-agents/assets/config-serialization.py | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 docs/voice-agents/assets/config-serialization.py

diff --git a/docs/voice-agents/assets/config-serialization.py b/docs/voice-agents/assets/config-serialization.py
new file mode 100644
index 00000000..a2d3eb4b
--- /dev/null
+++ b/docs/voice-agents/assets/config-serialization.py
@@ -0,0 +1,10 @@
+from speechmatics.voice import VoiceAgentConfigPreset, VoiceAgentConfig
+
+# Export preset to JSON
+config_json = VoiceAgentConfigPreset.SCRIBE().to_json()
+
+# Load from JSON
+config = VoiceAgentConfig.from_json(config_json)
+
+# Or create from JSON string
+config = VoiceAgentConfig.from_json('{"language": "en", "enable_diarization": true}')
\ No newline at end of file

From 8d6187bd9246d94a0cb38c835148b9256247c3f3 Mon Sep 17 00:00:00 2001
From: lgavincrl <lucy.gavin@speechmatics.com>
Date: Fri, 19 Dec 2025 16:12:37 +0000
Subject: [PATCH 6/6] Update preset descriptions and configuration for clarity

---
 docs/voice-agents/overview.mdx | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/docs/voice-agents/overview.mdx b/docs/voice-agents/overview.mdx
index 9f989f2e..0dcdc8d3 100644
--- a/docs/voice-agents/overview.mdx
+++ b/docs/voice-agents/overview.mdx
@@ -111,15 +111,15 @@ if __name__ == "__main__":
 ```
 
 #### Presets - the simplest way to get started
-These are optimized configurations for common use cases and require no further settings:
+These are purpose-built, optimized configurations, ready for use without further modification:
 
 `fast` - low latency, fast responses
 
-`adaptive` - natural dialogue in conversation
+`adaptive` - general conversation 
 
-`smart_turn` - advanced conversation, with ML turn detection
+`smart_turn` - complex conversation
 
-`external` - external end of turn - endpointing handled by the client
+`external` - user handles end of turn
 
 `scribe` - note-taking
 
@@ -132,7 +132,7 @@ presets = VoiceAgentConfigPreset.list_presets()
 
 ### 4. Custom configurations
 
-For more control, you can also specify custom configurations:
+For more control, you can also specify custom configurations or use presets as a starting point and customise with overlays:
 <Tabs>
 <TabItem value='voice-custom-config' label='Custom configurations'>
 Specify configurations in a `VoiceAgentConfig` object:
@@ -140,27 +140,20 @@ Specify configurations in a `VoiceAgentConfig` object:
     {pythonVoiceCustomConfig}
 </CodeBlock>
 </TabItem>
-<TabItem value='voice-custom-config-overlays' label='Custom configurations with overlays'>
+<TabItem value='voice-custom-config-overlays' label='Preset with a custom overlay'>
 Use presets as a starting point and customise with overlays:
 <CodeBlock language="python">
     {pythonVoiceConfigOverlays}
 </CodeBlock>
 </TabItem>
-<TabItem value='voice-custom-config-serialization' label='Configuration serialization'>
-Export or import configurations using JSON:
-<CodeBlock language="python">
-    {pythonVoiceConfigSerialization}
-</CodeBlock>
-</TabItem>
 </Tabs>
 
-Note: If no config or preset is provided, the client will default to the external preset.
+Note: If no configuration or preset is provided, the client will default to the `external` preset.
 
 
 
 
 ## FAQ
-
 ### Support
 
 <details>
@@ -171,7 +164,7 @@ You can submit feedback, bug reports, or feature requests through the Speechmati
 
 ## Next steps
 
-For more information, see the [Voice SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on github.
+For more information, see the [Voice SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on GitHub.
 
 To learn more, check out [the Speechmatics Academy](https://github.com/speechmatics/speechmatics-academy).