diff --git a/docs/voice-agents/assets/basic-quickstart.py b/docs/voice-agents/assets/basic-quickstart.py
index e649dbec..621f902d 100644
--- a/docs/voice-agents/assets/basic-quickstart.py
+++ b/docs/voice-agents/assets/basic-quickstart.py
@@ -4,10 +4,17 @@
from speechmatics.voice import VoiceAgentClient, AgentServerMessageType
async def main():
+ """Stream microphone audio to Speechmatics Voice Agent using 'scribe' preset"""
+
+ # Audio configuration
+ SAMPLE_RATE = 16000 # Hz
+ CHUNK_SIZE = 160 # Samples per read
+ PRESET = "scribe" # Configuration preset
+
# Create client with preset
client = VoiceAgentClient(
api_key=os.getenv("YOUR_API_KEY"),
- preset="scribe"
+ preset=PRESET
)
# Handle final segments
@@ -19,17 +26,20 @@ def on_segment(message):
print(f"{speaker}: {text}")
# Setup microphone
- mic = Microphone(sample_rate=16000, chunk_size=320)
+ mic = Microphone(SAMPLE_RATE, CHUNK_SIZE)
if not mic.start():
print("Error: Microphone not available")
return
- # Connect and stream
+ # Connect to the Voice agent
await client.connect()
+ # Stream microphone audio (interruptible using keyboard)
try:
while True:
- audio_chunk = await mic.read(320)
+ audio_chunk = await mic.read(CHUNK_SIZE)
+ if not audio_chunk:
+ break # Microphone stopped producing data
await client.send_audio(audio_chunk)
except KeyboardInterrupt:
pass
diff --git a/docs/voice-agents/assets/config-overlays.py b/docs/voice-agents/assets/config-overlays.py
new file mode 100644
index 00000000..be840baa
--- /dev/null
+++ b/docs/voice-agents/assets/config-overlays.py
@@ -0,0 +1,9 @@
+from speechmatics.voice import VoiceAgentConfigPreset, VoiceAgentConfig
+
+# Use preset with custom overrides
+config = VoiceAgentConfigPreset.SCRIBE(
+ VoiceAgentConfig(
+ language="es",
+ max_delay=0.8
+ )
+)
diff --git a/docs/voice-agents/assets/config-serialization.py b/docs/voice-agents/assets/config-serialization.py
new file mode 100644
index 00000000..a2d3eb4b
--- /dev/null
+++ b/docs/voice-agents/assets/config-serialization.py
@@ -0,0 +1,10 @@
+from speechmatics.voice import VoiceAgentConfigPreset, VoiceAgentConfig
+
+# Export preset to JSON
+config_json = VoiceAgentConfigPreset.SCRIBE().to_json()
+
+# Load from JSON
+config = VoiceAgentConfig.from_json(config_json)
+
+# Or create from JSON string
+config = VoiceAgentConfig.from_json('{"language": "en", "enable_diarization": true}')
\ No newline at end of file
diff --git a/docs/voice-agents/overview.mdx b/docs/voice-agents/overview.mdx
index 4f7dced0..0dcdc8d3 100644
--- a/docs/voice-agents/overview.mdx
+++ b/docs/voice-agents/overview.mdx
@@ -1,17 +1,17 @@
---
-description: Learn how to build voice-enabled applications with the Speechmatics Voice SDK
+description: Learn how to build voice-enabled applications with the Speechmatics Voice SDK
---
import Admonition from '@theme/Admonition';
import CodeBlock from '@theme/CodeBlock';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-import pythonVoiceQuickstart from "./assets/basic-quickstart.py?raw"
-import pythonVoicePresets from "./assets/presets.py?raw"
import pythonVoiceCustomConfig from "./assets/custom-config.py?raw"
+import pythonVoiceConfigOverlays from "./assets/config-overlays.py?raw"
+import pythonVoiceConfigSerialization from "./assets/config-serialization.py?raw"
-# Voice agents overview
-The Voice SDK builds on our Realtime API to provide features optimized for conversational AI:
+# Voice SDK overview
+The Voice SDK builds on our Realtime API to provide additional features optimized for conversational AI, using Python:
- **Intelligent segmentation**: groups words into meaningful speech segments per speaker.
- **Turn detection**: automatically detects when speakers finish talking.
@@ -39,7 +39,8 @@ Use the Realtime SDK when:
### 1. Create an API key
-[Create an API key in the portal](https://portal.speechmatics.com/settings/api-keys) to access the Voice SDK. Store your key securely as a managed secret.
+[Create a Speechmatics API key in the portal](https://portal.speechmatics.com/settings/api-keys) to access the Voice SDK.
+Store your key securely as a managed secret.
### 2. Install dependencies
@@ -51,38 +52,108 @@ pip install speechmatics-voice
pip install speechmatics-voice[smart]
```
-### 3. Configure
+### 3. Quickstart
+
+Here's how to stream microphone audio to the Voice Agent and transcribe finalised segments of speech, with speaker ID:
+
+```python
+import asyncio
+import os
+from speechmatics.rt import Microphone
+from speechmatics.voice import VoiceAgentClient, AgentServerMessageType
+
+async def main():
+ """Stream microphone audio to Speechmatics Voice Agent using 'scribe' preset"""
+
+ # Audio configuration
+ SAMPLE_RATE = 16000 # Hz
+ CHUNK_SIZE = 160 # Samples per read
+ PRESET = "scribe" # Configuration preset
+
+ # Create client with preset
+ client = VoiceAgentClient(
+ api_key=os.getenv("SPEECHMATICS_API_KEY"),
+ preset=PRESET
+ )
+
+ # Print finalised segments of speech with speaker ID
+ @client.on(AgentServerMessageType.ADD_SEGMENT)
+ def on_segment(message):
+ for segment in message["segments"]:
+ speaker = segment["speaker_id"]
+ text = segment["text"]
+ print(f"{speaker}: {text}")
+
+ # Setup microphone
+ mic = Microphone(SAMPLE_RATE, CHUNK_SIZE)
+ if not mic.start():
+ print("Error: Microphone not available")
+ return
+
+ # Connect to the Voice Agent
+ await client.connect()
+
+ # Stream microphone audio (interruptable using keyboard)
+ try:
+ while True:
+ audio_chunk = await mic.read(CHUNK_SIZE)
+ if not audio_chunk:
+ break # Microphone stopped producing data
+ await client.send_audio(audio_chunk)
+ except KeyboardInterrupt:
+ pass
+ finally:
+ await client.disconnect()
+
+if __name__ == "__main__":
+ asyncio.run(main())
-Replace `YOUR_API_KEY` with your actual API key from the portal:
+```
+
+#### Presets - the simplest way to get started
+These are purpose-built, optimized configurations, ready for use without further modification:
+
+`fast` - low latency, fast responses
+
+`adaptive` - general conversation
+
+`smart_turn` - complex conversation
+
+`external` - user handles end of turn
+
+`scribe` - note-taking
+
+`captions` - live captioning
+
+To view all available presets:
+```python
+presets = VoiceAgentConfigPreset.list_presets()
+```
+
+### 4. Custom configurations
+For more control, you can also specify custom configurations or use presets as a starting point and customise with overlays:
-
-
- {pythonVoiceQuickstart}
-
-
-
+
+Specify configurations in a `VoiceAgentConfig` object:
- {pythonVoicePresets}
+ {pythonVoiceCustomConfig}
-
+
+Use presets as a starting point and customise with overlays:
- {pythonVoiceCustomConfig}
+ {pythonVoiceConfigOverlays}
-## FAQ
+Note: If no configuration or preset is provided, the client will default to the `external` preset.
-### Implementation and deployment
-
-Can I deploy this in my own environment?
-Yes! The Voice SDK can be consumed via our managed service or deployed in your own environment. To learn more about on-premises deployment options, [speak to sales](https://www.speechmatics.com/speak-to-sales).
-
+## FAQ
### Support
@@ -93,7 +164,7 @@ You can submit feedback, bug reports, or feature requests through the Speechmati
## Next steps
-For more information, see the [Voice SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on github.
+For more information, see the [Voice SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on GitHub.
To learn more, check out [the Speechmatics Academy](https://github.com/speechmatics/speechmatics-academy).
diff --git a/docs/voice-agents/quickstart.mdx b/docs/voice-agents/quickstart.mdx
deleted file mode 100644
index 5b3dad73..00000000
--- a/docs/voice-agents/quickstart.mdx
+++ /dev/null
@@ -1,110 +0,0 @@
----
-description: Learn how to build voice-enabled applications with the Speechmatics voice SDK
----
-import Admonition from '@theme/Admonition';
-import CodeBlock from '@theme/CodeBlock';
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
-
-import pythonVoiceQuickstart from "./assets/basic-quickstart.py?raw"
-import pythonVoicePresets from "./assets/presets.py?raw"
-import pythonVoiceCustomConfig from "./assets/custom-config.py?raw"
-
-# Voice agent overview
-
-The voice SDK builds on our real-time API to provide features optimized for conversational AI:
-
-- **Intelligent segmentation**: groups words into meaningful speech segments per speaker.
-- **Turn detection**: automatically detects when speakers finish talking.
-- **Speaker management**: focus on or ignore specific speakers in multi-speaker scenarios.
-- **Preset configurations**: offers ready-to-use settings for conversations, note-taking, and captions.
-- **Simplified event handling**: delivers clean, structured segments instead of raw word-level events.
-
-### When to use the voice SDK vs real-time SDK
-
-Use the voice SDK when:
-
-- Building conversational AI or voice agents
-- You need automatic turn detection
-- You want speaker-focused transcription
-- You need ready-to-use presets for common scenarios
-
-Use the realtime SDK when:
-
-- You need the raw stream of word-by-word transcription data
-- Building custom segmentation logic
-- You want fine-grained control over every event
-- Processing batch files or custom workflows
-
-## Getting started
-
-### 1. Get your API key
-
-[Create an API key in the portal](https://portal.speechmatics.com/settings/api-keys) to access the voice SDK. Store your key securely as a managed secret.
-
-### 2. Install dependencies
-
-```bash
-# Standard installation
-pip install speechmatics-voice
-
-# With SMART_TURN (ML-based turn detection)
-pip install speechmatics-voice[smart]
-```
-
-### 3. Quickstart
-
-Replace `YOUR_API_KEY` with your actual API key from the portal:
-
-
-
-
- {pythonVoiceQuickstart}
-
-
-
-
- {pythonVoicePresets}
-
-
-
-
- {pythonVoiceCustomConfig}
-
-
-
-
-## FAQ
-
-### Implementation and deployment
-
-
-Can I deploy this in my own environment?
-
-Yes! The voice agent SDK can be consumed via our managed service or deployed in your own environment. To learn more about on-premises deployment options, [speak to sales](https://www.speechmatics.com/speak-to-sales).
-
-
-### Support
-
-
-Where can I provide feedback or get help?
-
-You can submit feedback, bug reports, or feature requests through the Speechmatics [GitHub discussions](https://github.com/orgs/speechmatics/discussions).
-
-
-## Next steps
-
-For more information, see the [voice agent Python SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on github.
-
-To learn more, check out [the Speechmatics academy](https://github.com/speechmatics/speechmatics-academy)
-
-Ready to build something amazing with our voice agent SDK? We'd love to hear about your project and help you succeed.
-
-**Get in touch with us:**
-- Share your feedback and feature requests
-- Ask questions about implementation
-- Discuss enterprise pricing and custom voices
-- Report any issues or bugs you encounter
-
-[Contact our team](https://support.speechmatics.com) or join our developer community (https://www.reddit.com/r/Speechmatics) to connect with other builders using text to speech.
-